In [1]:
import mlagents
from mlagents_envs.environment import UnityEnvironment as UE
import numpy as np
from mlagents_envs.environment import ActionTuple


In [2]:
# initialise envs

# env =  UE(file_name="stage0_160523\stage0_copy",seed=1,side_channels=[])
file_name_train = "C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S2 180723\\build"
file_name_test = "C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S2_test 180723\\build"
env_train =  UE(file_name=file_name_train,seed=1,side_channels=[],worker_id=8,no_graphics = False)
env_train.reset()
env_test =  UE(file_name=file_name_test,seed=1,side_channels=[],worker_id=9,no_graphics = False)
env_test.reset()

In [3]:
# env_train.close()
# env_test.close()

In [4]:
# model
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

vision_output_dim = 3136
num_words = 35  # Number of unique words in the vocabulary
language_output_dim = 128
embedding_dim = 128
mixing_dim = 256
lstm_hidden_dim = 256
num_actions = 4

# (3,128,128) --> (64,7,7) = 3136 (3-layer CNN)
class VisualModule(nn.Module): 
    def __init__(self):
        super(VisualModule, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=3, padding=0),
            nn.ReLU()
        )
        # self.conv = nn.Sequential(
        #     nn.Conv2d(3, 32, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(128, 64, kernel_size=5, stride=2, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        # )

    def forward(self, vt):
        encoded_vt = self.conv(vt)
        return encoded_vt.view(vt.size(0), -1).squeeze()

# one-hot encoding [0 0 1 0 0] --> 128 dimensional embedding (FF)
# S1:5 S2:5 S3:11 S4:9 --> 30 + 5 (noun) = 35 in total
class LanguageModule(nn.Module): 
    def __init__(self, num_words, embedding_dim):
        super(LanguageModule, self).__init__()
        self.embedding = nn.Linear(num_words, embedding_dim)

    def forward(self, lt):
        embedded_lt = self.embedding(lt)
        return embedded_lt

# 3136(vision) + 128 (language) --> 256 dimensional embedding (FF)
class MixingModule(nn.Module):
    def __init__(self, vision_output_dim, language_output_dim, mixing_dim):
        super(MixingModule, self).__init__()
        self.linear = nn.Linear(vision_output_dim + language_output_dim, mixing_dim)

    def forward(self, vision_output, language_output):
        combined_output = torch.cat((vision_output, language_output), dim=0)
        mixed_output = self.linear(combined_output)
        return mixed_output

class LSTMModule(nn.Module):
    def __init__(self,mixing_dim,lstm_hidden_dim):
        super(LSTMModule, self).__init__()
        self.lstm = nn.LSTMCell(mixing_dim, lstm_hidden_dim)
    
    def forward(self,mixed_output,lstm_hidden_state):
        lstm_hidden_state = self.lstm(mixed_output, lstm_hidden_state) 
        # lstm_output = lstm_hidden_state[0] # output is (hidden_state,cell_state), we need hidden state, shape (1,256)
        return lstm_hidden_state

class Agent(nn.Module):
    def __init__(self, num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions):
        super(Agent, self).__init__()
        self.language_module = LanguageModule(num_words, embedding_dim)
        self.visual_module = VisualModule()
        self.mixing_module = MixingModule(vision_output_dim, language_output_dim, mixing_dim)
        self.lstm_module = LSTMModule(mixing_dim, lstm_hidden_dim)
        self.action_predictor = nn.Linear(lstm_hidden_dim, num_actions)
        self.value_estimator = nn.Linear(lstm_hidden_dim, 1)

    def forward(self, vt, lt, lstm_hidden_state):
        vision_output = self.visual_module(vt)
        language_output = self.language_module(lt)
        mixed_output = self.mixing_module(vision_output, language_output).unsqueeze(0)
        lstm_output = self.lstm_module(mixed_output,lstm_hidden_state)
        action_probs = self.action_predictor(lstm_output[0]) 
        value_estimate = self.value_estimator(lstm_output[0])
        return action_probs,value_estimate,lstm_output
        
        
    def save(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
        if not os.path.exists(path):
            os.makedirs(path)
        torch.save(self.state_dict(), os.path.join(path, f'agent_{episode}.pt'))

    def load(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
        self.load_state_dict(torch.load(os.path.join(path, f'agent_{episode}.pt')))    

        


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# function to test model on test env
def test(agent,test_episode,test_episode_reward,test_average_reward,test_steps,test_actor_loss,test_critic_loss,test_entropy_loss,test_total_loss):
    env = env_test
    TEST_EPISODES = 100
    tracked_agent = -1
    entropy_term = 0
    for episode in range(TEST_EPISODES):
        test_episode += 1
        t0 = time.time()
        episode_reward = 0
        # env.reset()
        behavior_name=list(env.behavior_specs)[0]
        spec=env.behavior_specs[behavior_name]
        # state = env.reset().astype(np.float32)
        STEPS = 0
        decision_steps, terminal_steps = env.get_steps(behavior_name)
        # state -- vt, lt, lstm
        vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
        index1 = int(decision_steps.obs[1][0][0])
        index2 = int(decision_steps.obs[1][0][1])+5
        print(f'TEST: ---{hashmap[index2]} {hashmap[index1]}---')
        # 0-capsule,1-cube,2-cylinder,3-prism,4-sphere 
        lt = torch.zeros(35).to(device)
        lt[index1],lt[index2] = 1,1
        lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
        done = False
        while True:

            # Need to use when calculating the loss
            log_probs = []
            # values = []
            values = torch.empty(0).to(device)
            rewards = []

            
            lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
            STEPS += 1
            policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
            # value = value.detach()
            dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy()
            

            action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1))
            # action_dist = Categorical(F.softmax(policy_dist,dim=1))
            action = action_dist.sample() # sample an action from action_dist
            action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()
            
            log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
            # log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
            # entropy = -np.sum(np.mean(dist)* np.log(dist))
            entropy = F.cross_entropy(policy_dist.detach(), action)

            discrete_actions = np.array(action_onehot).reshape(1,4)*speed
            action_tuple = ActionTuple()
            action_tuple.add_discrete(discrete_actions)
            env.set_actions(behavior_name,action_tuple)
            env.step()
            decision_steps, terminal_steps = env.get_steps(behavior_name)

            if tracked_agent == -1 and len(decision_steps) >= 1:
                tracked_agent = decision_steps.agent_id[0]
                # print(tracked_agent)

            if tracked_agent in terminal_steps: # roll over or hit the target
                print('TEST: Agent in terminal steps')
                done = True
                reward = terminal_steps[tracked_agent].reward
                if reward > 0:
                    pass
                else: reward = -1 # roll over or other unseen conditions

                print(f'TEST: Terminal Step reward: {reward}')

            elif tracked_agent in decision_steps: # the agent which requires action
                reward = decision_steps[tracked_agent].reward
                # print(f'Decision Step reward: {reward}')
                if reward<0:
                    print(f'TEST: Decision Step reward: {reward}')
            if STEPS >= MAX_STEPS:
                reward = -10
                print(f'TEST: Max Step Reward: {reward}')
                env.reset()
                done = True
            if STEPS % 100 == 0:
                print (f'TEST: Step: {STEPS}')

            episode_reward = episode_reward + reward

            rewards.append(reward)
            # values.append(value)
            values = torch.cat((values, value), dim=0)
            log_probs.append(log_prob)
            entropy_term = entropy_term + entropy
            vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
            vt = vt_new

            if done:
                # _, Qval,_ = agent(vt_new,lt,lstm_hidden_state)
                # Qval = Qval.detach()
                break
            
            
        discounted_rewards = np.zeros_like(values.cpu().detach().numpy())
        cumulative = 0
        for t in reversed(range(len(rewards))):
            cumulative = rewards[t] + LAM * cumulative # Monte Carlo
            discounted_rewards[t] = cumulative
        # print(f'rewards:{rewards}, discounted_rewards:{discounted_rewards}')
        # Advantage Actor Critic

        # Qvals[-1] = rewards[t] + LAM * Qval      or       Qvals[-1] = rewards[t]                   
        # for t in range(len(rewards)-1):
        #         Qvals[t] = rewards[t] + LAM * values[t+1]
        
        # r_(t+1) = R(s_t|a_t)--> reward[t]        a_t, V_t = agent(s_t)
        # A_t = r_(t+1) + LAM * V_(t+1) - V_t 
        #     = Q_t - V_t
        
        # Monte Carlo Advantage = reward + LAM * cumulative_reward
        # Actor_loss = -log(pai(s_t|a_t))*A_t
        # Critic_loss = A_t.pow(2) *0.5
        # Entropy_loss = -F.entropy(pai(St),index) * 0.001

        # entropy = -np.sum(np.mean(dist) * np.log(dist))
        
        #update actor critic
        
        # values = torch.FloatTensor(values).requires_grad_(True).to(device)
        discounted_rewards = torch.FloatTensor(discounted_rewards.astype(np.float32)).to(device)
        log_probs = torch.stack(log_probs)
        advantage = discounted_rewards - values
        actor_loss = (-log_probs * advantage).mean()
        critic_loss = 0.5 * torch.square(advantage).mean()
        entropy_term /= STEPS
        entropy_loss = -0.1 * entropy_term
        ac_loss = actor_loss + critic_loss + entropy_loss
        test_episode_reward.append(float(episode_reward))
        test_steps.append(STEPS)
        test_actor_loss.append(float(actor_loss))
        test_critic_loss.append(float(critic_loss))
        test_entropy_loss.append(float(entropy_loss))
        test_total_loss.append(float(ac_loss))

        if test_episode >= 100:
            avg_score = np.mean(test_episode_reward[-100:])
            test_average_reward.append(avg_score)
            print('Testing  | Episode: {}/{}  | Episode Reward: {:.0f}  | Average Reward {:.2f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                .format(episode + 1, TEST_EPISODES, episode_reward, avg_score, actor_loss, critic_loss,entropy_loss,  ac_loss, STEPS))
        else:  print('Testing  | Episode: {}/{}  | Episode Reward: {:.0f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                .format(episode + 1, TEST_EPISODES, episode_reward, actor_loss, critic_loss, entropy_loss,  ac_loss, STEPS))
    return test_episode,test_episode_reward,test_average_reward,test_steps,test_actor_loss,test_critic_loss,test_entropy_loss,test_total_loss
                

In [None]:
# training loop
import argparse
import time
import json
# import matplotlib.pyplot as plt
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical

entropy_term = 0
# add arguments in command --train/test
# parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
# parser.add_argument('--train', dest='train', action='store_true', default=False)
# parser.add_argument('--test', dest='test', action='store_true', default=True)
# args = parser.parse_args()
train = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device) 

S0_ALG_NAME = 'S2'
S0_ENV_ID = '22'
S0_episode = 48000

ALG_NAME = 'S2'
ENV_ID = '25'
TRAIN_EPISODES = 68000  # number of overall episodes for training  # number of overall episodes for testing
MAX_STEPS = 500  # maximum time step in one episode
LAM = 0.95  # reward discount in TD error
lr = 2.5e-5  #0.00005 
speed = 3
num_steps = 250 # the step for updating the network
test_episode = 0
if __name__ == '__main__':
    agent = Agent(num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions)
    agent.load(S0_episode,S0_ALG_NAME,S0_ENV_ID)
    agent.to(device)
    optimizer = optim.RMSprop(agent.parameters(), lr=lr)
    best_score = float('-inf')
    hashmap = {
        0: 'capsule',
        1: 'cube',
        2: 'cylinder',
        3: 'prism',
        4: 'sphere',
        5: 'red',
        6: 'green',
        7: 'blue',
        8: 'yellow',
        9: 'black'}
    if train:
        entropy_term = 0
        test_episode_reward = []
        test_average_reward = []
        test_steps = []
        test_actor_loss = []
        test_critic_loss = []
        test_entropy_loss = []
        test_total_loss = []
        tracked_agent = -1
        test_episode = 0
        all_episode_reward = []
        all_average_reward = []
        all_steps = []
        all_actor_loss = []
        all_critic_loss = []
        all_entropy_loss = []
        all_total_loss = []
        env = env_train
        for episode in range(TRAIN_EPISODES):
            t0 = time.time()
            episode_reward = 0
            # env.reset()
            behavior_name=list(env.behavior_specs)[0]
            spec=env.behavior_specs[behavior_name]
            # state = env.reset().astype(np.float32)
            STEPS = 0

            decision_steps, terminal_steps = env.get_steps(behavior_name)
            # state -- vt, lt, lstm
            vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
            index1 = int(decision_steps.obs[1][0][0])
            index2 = int(decision_steps.obs[1][0][1])+5
            print()
            print(f'---{hashmap[index2]} {hashmap[index1]}---')
            # 0-capsule,1-cube,2-cylinder,3-prism,4-sphere 
            lt = torch.zeros(35).to(device)
            lt[index1],lt[index2] = 1,1
            lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
            done = False
            while True:

                # Need to use when calculating the loss
                log_probs = []
                # values = []
                values = torch.empty(0).to(device)
                rewards = []

                for steps in range(num_steps):
                    lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
                    STEPS += 1
                    policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
                    # value = value.detach()
                    dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy()
                    

                    action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1))
                    # action_dist = Categorical(F.softmax(policy_dist,dim=1))
                    action = action_dist.sample() # sample an action from action_dist
                    action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()
                    
                    log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
                    # log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
                    # entropy = -np.sum(np.mean(dist)* np.log(dist))
                    entropy = F.cross_entropy(policy_dist.detach(), action)

                    discrete_actions = np.array(action_onehot).reshape(1,4)*speed
                    action_tuple = ActionTuple()
                    action_tuple.add_discrete(discrete_actions)
                    env.set_actions(behavior_name,action_tuple)
                    env.step()
                    decision_steps, terminal_steps = env.get_steps(behavior_name)

                    if tracked_agent == -1 and len(decision_steps) >= 1:
                        tracked_agent = decision_steps.agent_id[0]
                        # print(tracked_agent)

                    if tracked_agent in terminal_steps: # roll over or hit the target
                        print('Agent in terminal steps')
                        done = True
                        reward = terminal_steps[tracked_agent].reward
                        if reward > 0:
                            pass
                        else: reward = -1 # roll over or other unseen conditions

                        print(f'Terminal Step reward: {reward}')

                    elif tracked_agent in decision_steps: # the agent which requires action
                        reward = decision_steps[tracked_agent].reward
                        # print(f'Decision Step reward: {reward}')
                        if reward<0:
                            print(f'Decision Step reward: {reward}')
                    if STEPS >= MAX_STEPS:
                        reward = -10
                        print(f'Max Step Reward: {reward}')
                        env.reset()
                        done = True
                    if STEPS % num_steps == 0:
                        print (f'Step: {STEPS}')

                    episode_reward = episode_reward + reward

                    rewards.append(reward)
                    # values.append(value)
                    values = torch.cat((values, value), dim=0)
                    log_probs.append(log_prob)
                    entropy_term = entropy_term + entropy
                    vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
                    vt = vt_new

                    if done or steps == num_steps-1:
                        # _, Qval,_ = agent(vt_new,lt,lstm_hidden_state)
                        # Qval = Qval.detach()
                        break
                
                
                discounted_rewards = np.zeros_like(values.cpu().detach().numpy())
                cumulative = 0
                for t in reversed(range(len(rewards))):
                    cumulative = rewards[t] + LAM * cumulative # Monte Carlo
                    discounted_rewards[t] = cumulative
                # print(f'rewards:{rewards}, discounted_rewards:{discounted_rewards}')
                # Advantage Actor Critic

                # Qvals[-1] = rewards[t] + LAM * Qval      or       Qvals[-1] = rewards[t]                   
                # for t in range(len(rewards)-1):
                #         Qvals[t] = rewards[t] + LAM * values[t+1]
                
                # r_(t+1) = R(s_t|a_t)--> reward[t]        a_t, V_t = agent(s_t)
                # A_t = r_(t+1) + LAM * V_(t+1) - V_t 
                #     = Q_t - V_t
                
                # Monte Carlo Advantage = reward + LAM * cumulative_reward
                # Actor_loss = -log(pai(s_t|a_t))*A_t
                # Critic_loss = A_t.pow(2) *0.5
                # Entropy_loss = -F.entropy(pai(St),index) * 0.001

                # entropy = -np.sum(np.mean(dist) * np.log(dist))
                
                #update actor critic
                
                # values = torch.FloatTensor(values).requires_grad_(True).to(device)
                discounted_rewards = torch.FloatTensor(discounted_rewards.astype(np.float32)).to(device)
                log_probs = torch.stack(log_probs)
                advantage = discounted_rewards - values
                actor_loss = (-log_probs * advantage).mean()
                critic_loss = 0.5 * torch.square(advantage).mean()
                entropy_term /= num_steps
                entropy_loss = -0.1 * entropy_term
                ac_loss = actor_loss + critic_loss + entropy_loss
                # ac_loss = values.mean()
                optimizer.zero_grad()
                ac_loss.backward()
                optimizer.step()
                # for name, param in agent.named_parameters():
                #     if param.grad is not None:
                #         print(name, param.grad)
                #     else:
                #         print(name, "gradients not computed")
                # for name, param in agent.named_parameters():
                #     if name == 'value_estimator.weight':
                #         print(name, param)
                
                
                if done: break


            all_episode_reward.append(float(episode_reward))
            all_steps.append(STEPS)
            all_actor_loss.append(float(actor_loss))
            all_critic_loss.append(float(critic_loss))
            all_entropy_loss.append(float(entropy_loss))
            all_total_loss.append(float(ac_loss))
            if episode >= 100:
                avg_score = np.mean(all_episode_reward[-100:])
                all_average_reward.append(avg_score)
                if avg_score > best_score:
                    best_score = avg_score
                    agent.save(episode, ALG_NAME, ENV_ID)
                    print(f'-----The best score for averaging previous 100 episode reward is {best_score}. Model has been saved-----')
                print('Training  | Episode: {}/{}  | Episode Reward: {:.0f}  | Average Reward {:.2f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                    .format(episode + 1, TRAIN_EPISODES, episode_reward, avg_score, actor_loss, critic_loss,entropy_loss,  ac_loss, STEPS))
            else:  print('Training  | Episode: {}/{}  | Episode Reward: {:.0f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                    .format(episode + 1, TRAIN_EPISODES, episode_reward, actor_loss, critic_loss, entropy_loss,  ac_loss, STEPS))
            if episode%500 == 0:
                    agent.save(episode, ALG_NAME, ENV_ID)
                    print("Model has been saved")
            if episode%100 == 0:
                test_episode,test_episode_reward,test_average_reward,test_steps,test_actor_loss,test_critic_loss,test_entropy_loss,test_total_loss = test(agent,test_episode,test_episode_reward,test_average_reward,test_steps,test_actor_loss,test_critic_loss,test_entropy_loss,test_total_loss)

        print(all_average_reward)
        agent.save(episode ,ALG_NAME, ENV_ID)
        print("Model has been saved")

        data = {
                    'all_average_reward': all_average_reward,
                    'all_episode_reward': all_episode_reward,
                    'all_actor_loss': all_actor_loss,
                    'all_critic_loss': all_critic_loss,
                    'all_entropy_loss': all_entropy_loss,
                    'all_total_loss': all_total_loss,
                    'all_steps': all_steps,
                } 
        file_path = f'result/{ALG_NAME}_{ENV_ID}_train.txt'
        with open(file_path, 'w') as file:
            json.dump(data, file)
        
        test_data = {
                    'all_average_reward': test_average_reward,
                    'all_episode_reward': test_episode_reward,
                    'all_actor_loss': test_actor_loss,
                    'all_critic_loss': test_critic_loss,
                    'all_entropy_loss': test_entropy_loss,
                    'all_total_loss': test_total_loss,
                    'all_steps': test_steps,
                } 
        file_path = f'result/{ALG_NAME}_{ENV_ID}_test.txt'
        with open(file_path, 'w') as file:
            json.dump(test_data, file)



cuda

---blue prism---




Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1/68000  | Episode Reward: 10  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6
Model has been saved
TEST: ---black sphere---




TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 1/100  | Episode Reward: 10  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0001  | Total Loss: 0.02 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 10  | Actor loss: 0.04 | Critic loss: 4.35 | Entropy loss: -0.0032  | Total Loss: 4.38 | Total Steps: 41
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3/100  | Episode Reward: 10  | Actor loss: 0.00 | Critic loss: 0.17 | Entropy loss: -0.0006  | Total Loss: 0.17 | Total Steps: 31
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 5  | Actor loss: 0.00 | Critic loss: 0.06 | Entropy loss: -0.0029  | Total Loss: 0.06 | Total Steps: 43
TEST: --

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 33/100  | Episode Reward: 10  | Actor loss: 0.00 | Critic loss: 1.09 | Entropy loss: -0.0047  | Total Loss: 1.09 | Total Steps: 41
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 34/100  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 7.07 | Entropy loss: -0.0057  | Total Loss: 7.07 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 35/100  | Episode Reward: 10  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0010  | Total Loss: 0.03 | Total Steps: 6
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 36/100  | Episode Reward: 10  | Actor loss: 0.05 | Critic loss: 4.42 | Entropy loss: -0.0035  | Total Loss: 4.46 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Ste

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 65/100  | Episode Reward: 10  | Actor loss: 0.20 | Critic loss: 3.87 | Entropy loss: -0.0074  | Total Loss: 4.06 | Total Steps: 31
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 66/100  | Episode Reward: 10  | Actor loss: 5.18 | Critic loss: 7.09 | Entropy loss: -0.0590  | Total Loss: 12.21 | Total Steps: 7
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 67/100  | Episode Reward: 8  | Actor loss: 0.00 | Critic loss: 0.46 | Entropy loss: -0.0023  | Total Loss: 0.46 | Total Steps: 47
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 68/100  | Episode Reward: 10  | Actor loss: 0.32 | Critic loss: 4.17 | Entropy loss: -0.0074  | Total Loss: 4.48 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 97/100  | Episode Reward: 10  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0006  | Total Loss: -0.00 | Total Steps: 31
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 10  | Actor loss: 0.00 | Critic loss: 0.52 | Entropy loss: -0.0014  | Total Loss: 0.52 | Total Steps: 36
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 2  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0054  | Total Loss: 0.01 | Total Steps: 52
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 8  | Average Reward 8.54  | Actor loss: 0.11

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 33/68000  | Episode Reward: 10  | Actor loss: -0.02 | Critic loss: 0.35 | Entropy loss: -0.0004  | Total Loss: 0.32 | Total Steps: 41

---green capsule---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 34/68000  | Episode Reward: 8  | Actor loss: -0.24 | Critic loss: 2.67 | Entropy loss: -0.0013  | Total Loss: 2.43 | Total Steps: 46

---blue capsule---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 35/68000  | Episode Reward: 8  | Actor loss: -0.11 | Critic loss: 2.10 | Entropy loss: -0.0009  | Total Loss: 1.99 | Total Steps: 47

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 36/68000  | Episode Reward: 10  | Actor loss: 0.00 | Critic loss: 0.87 | Entropy loss: -0.0000  | Total Loss: 0.87 | Total Steps: 31

---black cylinder---
Agent in terminal steps
Terminal Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 69/68000  | Episode Reward: 10  | Actor loss: 0.00 | Critic loss: 0.61 | Entropy loss: -0.0001  | Total Loss: 0.61 | Total Steps: 31

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 70/68000  | Episode Reward: 10  | Actor loss: 0.00 | Critic loss: 0.57 | Entropy loss: -0.0000  | Total Loss: 0.57 | Total Steps: 31

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 71/68000  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 0.09 | Entropy loss: -0.0001  | Total Loss: 0.10 | Total Steps: 36

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 72/68000  | Episode Reward: 10  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 73/68000  | Episode Reward: 10  

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 10  | Average Reward 8.56  | Actor loss: 0.00 | Critic loss: 0.78 | Entropy loss: -0.0009  | Total Loss: 0.78 | Total Steps: 31
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 5/100  | Episode Reward: 8  | Average Reward 8.54  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0120  | Total Loss: -0.00 | Total Steps: 47
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 6/100  | Episode Reward: 10  | Average Reward 8.54  | Actor loss: 2.27 | Critic loss: 11.07 | Entropy loss: -0.0090  | Total Loss: 13.34 | Total Steps: 68
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 7/100  | Episode Reward: 10  | Average Reward 8.62  | Actor loss: 0.01 | Critic loss: 7.68 | Entrop

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 33/100  | Episode Reward: 8  | Average Reward 8.39  | Actor loss: -0.00 | Critic loss: 0.18 | Entropy loss: -0.0005  | Total Loss: 0.18 | Total Steps: 38
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 34/100  | Episode Reward: 10  | Average Reward 8.39  | Actor loss: 0.00 | Critic loss: 0.73 | Entropy loss: -0.0007  | Total Loss: 0.73 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 35/100  | Episode Reward: 10  | Average Reward 8.39  | Actor loss: 0.11 | Critic loss: 12.53 | Entropy loss: -0.0669  | Total Loss: 12.57 | Total Steps: 7
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 36/100  | Episode Reward: 10  | Average Reward 8.39  | Actor loss: 0.00 | Critic loss: 0.00 | Entrop

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 63/100  | Episode Reward: 10  | Average Reward 8.39  | Actor loss: 0.00 | Critic loss: 0.22 | Entropy loss: -0.0151  | Total Loss: 0.20 | Total Steps: 36
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 64/100  | Episode Reward: 10  | Average Reward 8.44  | Actor loss: 0.00 | Critic loss: 0.37 | Entropy loss: -0.0026  | Total Loss: 0.37 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 65/100  | Episode Reward: 10  | Average Reward 8.44  | Actor loss: 0.00 | Critic loss: 3.41 | Entropy loss: -0.0017  | Total Loss: 3.41 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 66/100  | Episode Reward: 10  | Average Reward 8.44  | Actor loss: 0.01 | Critic loss: 3.86 | Entropy loss: -0.0004  | Total Loss: 

TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: 2  | Average Reward 8.15  | Actor loss: 4.65 | Critic loss: 13.00 | Entropy loss: -0.0122  | Total Loss: 17.63 | Total Steps: 113
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: 10  | Average Reward 8.15  | Actor loss: 0.00 | Critic loss: 2.09 | Entropy loss: -0.0012  | Total Loss: 2.09 | Total Steps: 36
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 5  | Average Reward 8.15  | Actor loss: 0.00 | Critic loss: 0.57 | Entropy loss: -0.0030  | Total Loss: 0.57 | Total Steps: 47
TEST: ---yellow prism---

Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 100 episode reward is 9.625. Model has been saved-----
Training  | Episode: 125/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.00 | Critic loss: 0.32 | Entropy loss: -0.0001  | Total Loss: 0.33 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 126/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 127/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.00 | Critic loss: 0.04 | Entropy loss: -0.0000  | Total Loss: 0.04 | Total Steps: 36

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 128/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.00 | Cri

Decision Step reward: -2.5
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 156/68000  | Episode Reward: 5  | Average Reward 9.68  | Actor loss: -0.32 | Critic loss: 5.81 | Entropy loss: -0.0022  | Total Loss: 5.49 | Total Steps: 46

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 157/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.09 | Entropy loss: -0.0000  | Total Loss: 0.10 | Total Steps: 6

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 158/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.01 | Critic loss: 0.35 | Entropy loss: -0.0007  | Total Loss: 0.35 | Total Steps: 32

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 159/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.06 | Critic loss: 0.37 | Entropy loss: -0.0041  | Total Los

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 189/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: -0.00 | Critic loss: 0.34 | Entropy loss: -0.0000  | Total Loss: 0.34 | Total Steps: 31

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 190/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.01 | Critic loss: 0.30 | Entropy loss: -0.0001  | Total Loss: 0.31 | Total Steps: 36

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 191/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.02 | Critic loss: 0.20 | Entropy loss: -0.0017  | Total Loss: 0.21 | Total Steps: 37

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 192/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.00 | Critic loss: 0.43 | Entropy loss: -0.0001  | Total Loss: 0.43 | Total Steps: 31

---black capsule---
Ag

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: 8  | Average Reward 8.46  | Actor loss: -0.00 | Critic loss: 0.12 | Entropy loss: -0.0002  | Total Loss: 0.12 | Total Steps: 38
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 10  | Average Reward 8.46  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0018  | Total Loss: -0.00 | Total Steps: 36
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 21/100  | Episode Reward: 10  | Average Reward 8.46  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0003  | Total Loss: 0.02 | Total Steps: 6
TEST: ---yellow prism---
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 6  | Av

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: 10  | Average Reward 8.18  | Actor loss: 0.00 | Critic loss: 1.72 | Entropy loss: -0.0001  | Total Loss: 1.72 | Total Steps: 31
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 49/100  | Episode Reward: 10  | Average Reward 8.18  | Actor loss: 0.00 | Critic loss: 1.55 | Entropy loss: -0.0017  | Total Loss: 1.55 | Total Steps: 6
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 50/100  | Episode Reward: 2  | Average Reward 8.11  | Actor loss: 0.00 | Critic loss: 0.25 | Entropy loss: -0.0027  | Total Loss: 0.25 | Total Steps: 52
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 51/100  | Episode Reward: 10  | Aver

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episode Reward: 10  | Average Reward 8.13  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0011  | Total Loss: 0.08 | Total Steps: 36
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: 10  | Average Reward 8.13  | Actor loss: 0.00 | Critic loss: 1.03 | Entropy loss: -0.0147  | Total Loss: 1.02 | Total Steps: 33
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 8  | Average Reward 8.13  | Actor loss: 0.00 | Critic loss: 3.57 | Entropy loss: -0.0006  | Total Loss: 3.57 | Total Steps: 47
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 81/100  | Episode Reward: 5  | Av

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 210/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: -0.02 | Critic loss: 0.56 | Entropy loss: -0.0017  | Total Loss: 0.54 | Total Steps: 44

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 211/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.12 | Entropy loss: -0.0000  | Total Loss: 0.12 | Total Steps: 6

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 212/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.13 | Entropy loss: -0.0001  | Total Loss: 0.13 | Total Steps: 37

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 213/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.36 | Entropy loss: -0.0001  | Total Loss: 0.36 | Total Steps: 36

---blue cube---
Agent in te

Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 243/68000  | Episode Reward: 8  | Average Reward 9.69  | Actor loss: -0.36 | Critic loss: 2.49 | Entropy loss: -0.0026  | Total Loss: 2.13 | Total Steps: 47

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 244/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.00 | Critic loss: 0.23 | Entropy loss: -0.0001  | Total Loss: 0.23 | Total Steps: 36

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 245/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 246/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.00 | Critic loss: 0.51 | Entropy loss: -0.0000  | Total Loss: 0.51 | Total Steps:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 276/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: -0.00 | Critic loss: 0.52 | Entropy loss: -0.0001  | Total Loss: 0.51 | Total Steps: 31

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 277/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.01 | Critic loss: 0.19 | Entropy loss: -0.0001  | Total Loss: 0.20 | Total Steps: 36

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 278/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: -0.01 | Critic loss: 0.33 | Entropy loss: -0.0012  | Total Loss: 0.31 | Total Steps: 41

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 279/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: -0.02 | Critic loss: 0.48 | Entropy loss: -0.0003  | Total Loss: 0.46 | Total Steps: 31

---red cylinder---
Agent in 

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 7/100  | Episode Reward: 5  | Average Reward 8.55  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0013  | Total Loss: 0.02 | Total Steps: 42
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 8/100  | Episode Reward: 10  | Average Reward 8.58  | Actor loss: 0.27 | Critic loss: 17.27 | Entropy loss: -0.0267  | Total Loss: 17.51 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 9/100  | Episode Reward: 10  | Average Reward 8.58  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0045  | Total Loss: -0.00 | Total Steps: 6
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 10/100  | Episode Reward: 10  | Average Reward 8.58  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 38/100  | Episode Reward: 8  | Average Reward 8.96  | Actor loss: 0.13 | Critic loss: 7.60 | Entropy loss: -0.0025  | Total Loss: 7.73 | Total Steps: 43
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 39/100  | Episode Reward: 5  | Average Reward 8.91  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0004  | Total Loss: 0.03 | Total Steps: 42
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 10  | Average Reward 8.91  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0031  | Total Loss: 0.01 | Total Steps: 39
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 10  | Ave

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 68/100  | Episode Reward: 10  | Average Reward 8.87  | Actor loss: 0.11 | Critic loss: 0.23 | Entropy loss: -0.0018  | Total Loss: 0.33 | Total Steps: 36
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 69/100  | Episode Reward: 8  | Average Reward 8.84  | Actor loss: 0.00 | Critic loss: 0.38 | Entropy loss: -0.0141  | Total Loss: 0.37 | Total Steps: 43
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 10  | Average Reward 8.89  | Actor loss: 0.00 | Critic loss: 0.17 | Entropy loss: -0.0012  | Total Loss: 0.17 | Total Steps: 31
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 10  | Average Reward 8.89  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 10  | Average Reward 8.84  | Actor loss: 0.00 | Critic loss: 0.02 | Entropy loss: -0.0004  | Total Loss: 0.02 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 10  | Average Reward 8.84  | Actor loss: 0.00 | Critic loss: 0.23 | Entropy loss: -0.0016  | Total Loss: 0.22 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 10  | Average Reward 8.84  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0106  | Total Loss: 0.04 | Total Steps: 69

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 302/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.02 | Critic loss: 0.14 | Entropy loss: -0.0003  | Total Loss: 0.15 | Total 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 331/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.02 | Critic loss: 0.56 | Entropy loss: -0.0003  | Total Loss: 0.58 | Total Steps: 31

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 332/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: -0.05 | Critic loss: 0.39 | Entropy loss: -0.0013  | Total Loss: 0.35 | Total Steps: 32

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 333/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.03 | Critic loss: 0.40 | Entropy loss: -0.0004  | Total Loss: 0.43 | Total Steps: 41

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 334/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---black cylinder---
Agen

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 363/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: 0.02 | Critic loss: 0.47 | Entropy loss: -0.0005  | Total Loss: 0.50 | Total Steps: 42

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 364/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: 0.01 | Critic loss: 0.18 | Entropy loss: -0.0001  | Total Loss: 0.18 | Total Steps: 36

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 365/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: 0.01 | Critic loss: 0.08 | Entropy loss: -0.0002  | Total Loss: 0.09 | Total Steps: 36

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 366/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow cube---
Agent in t


---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 397/68000  | Episode Reward: 10  | Average Reward 9.76  | Actor loss: 0.00 | Critic loss: 0.45 | Entropy loss: -0.0000  | Total Loss: 0.45 | Total Steps: 31

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 398/68000  | Episode Reward: 10  | Average Reward 9.76  | Actor loss: 0.00 | Critic loss: 0.19 | Entropy loss: -0.0002  | Total Loss: 0.19 | Total Steps: 37

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 399/68000  | Episode Reward: 10  | Average Reward 9.76  | Actor loss: 0.00 | Critic loss: 0.26 | Entropy loss: -0.0001  | Total Loss: 0.26 | Total Steps: 36

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 400/68000  | Episode Reward: 10  | Average Reward 9.76  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

--

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 26/100  | Episode Reward: 10  | Average Reward 8.68  | Actor loss: 0.25 | Critic loss: 6.06 | Entropy loss: -0.0143  | Total Loss: 6.30 | Total Steps: 37
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 10  | Average Reward 8.68  | Actor loss: 0.00 | Critic loss: 0.24 | Entropy loss: -0.0019  | Total Loss: 0.23 | Total Steps: 31
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 10  | Average Reward 8.70  | Actor loss: 0.00 | Critic loss: 1.92 | Entropy loss: -0.0008  | Total Loss: 1.93 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 10  | Average Reward 8.72  | Actor loss: 0.00 | Critic loss: 0.02 | Entropy loss: -0.0013  | Total Loss: 0

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 55/100  | Episode Reward: 8  | Average Reward 8.57  | Actor loss: 0.00 | Critic loss: 0.21 | Entropy loss: -0.0154  | Total Loss: 0.19 | Total Steps: 47
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 8  | Average Reward 8.62  | Actor loss: 0.00 | Critic loss: 0.59 | Entropy loss: -0.0093  | Total Loss: 0.58 | Total Steps: 37
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: 10  | Average Reward 8.62  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0053  | Total Loss: 0.06 | Total Steps: 37
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100  | Episode Reward: 10  | Average Reward 8.65  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 84/100  | Episode Reward: 10  | Average Reward 8.64  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0011  | Total Loss: 0.01 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 85/100  | Episode Reward: 10  | Average Reward 8.64  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0002  | Total Loss: 0.00 | Total Steps: 6
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 86/100  | Episode Reward: 10  | Average Reward 8.64  | Actor loss: 0.00 | Critic loss: 2.05 | Entropy loss: -0.0004  | Total Loss: 2.05 | Total Steps: 6
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 87/100  | Episode Reward: 5  | Average Reward 8.59  | Actor loss: -0.00

Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 418/68000  | Episode Reward: 8  | Average Reward 9.71  | Actor loss: -0.01 | Critic loss: 2.47 | Entropy loss: -0.0001  | Total Loss: 2.45 | Total Steps: 38

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 419/68000  | Episode Reward: 10  | Average Reward 9.71  | Actor loss: -0.00 | Critic loss: 0.58 | Entropy loss: -0.0000  | Total Loss: 0.58 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 420/68000  | Episode Reward: 10  | Average Reward 9.71  | Actor loss: 0.03 | Critic loss: 0.46 | Entropy loss: -0.0006  | Total Loss: 0.49 | Total Steps: 42

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 421/68000  | Episode Reward: 10  | Average Reward 9.71  | Actor loss: 0.00 | Critic loss: 0.13 | Entropy loss: -0.0001  | Total Loss: 0.13 | Total Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 451/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 452/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: -0.00 | Critic loss: 0.04 | Entropy loss: -0.0000  | Total Loss: 0.04 | Total Steps: 6

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 453/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.00 | Critic loss: 0.12 | Entropy loss: -0.0001  | Total Loss: 0.12 | Total Steps: 36

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 454/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.11 | Critic loss: 1.11 | Entropy loss: -0.0010  | Total Loss: 1.22 | Total Steps: 42

---green capsule---
Agent in

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 484/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.00 | Critic loss: 0.58 | Entropy loss: -0.0000  | Total Loss: 0.58 | Total Steps: 31

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 485/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.09 | Critic loss: 0.29 | Entropy loss: -0.0024  | Total Loss: 0.37 | Total Steps: 38

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 486/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 487/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---red prism---
Deci

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 15/100  | Episode Reward: 8  | Average Reward 8.54  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0058  | Total Loss: 0.06 | Total Steps: 46
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 16/100  | Episode Reward: 10  | Average Reward 8.54  | Actor loss: 0.02 | Critic loss: 3.70 | Entropy loss: -0.0011  | Total Loss: 3.72 | Total Steps: 31
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: 10  | Average Reward 8.54  | Actor loss: 10.85 | Critic loss: 31.01 | Entropy loss: -0.0319  | Total Loss: 41.83 | Total Steps: 11
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 10  | Average Reward 8.54  | Actor loss: -0.00 | Critic loss: 0.01 | E

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 10  | Average Reward 8.08  | Actor loss: 0.28 | Critic loss: 0.62 | Entropy loss: -0.0027  | Total Loss: 0.90 | Total Steps: 36
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 200
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 300
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 400
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 44/100  | Episode Reward: -48  | Average Reward 7.53  | Actor loss: -0.00 | Critic loss: 94.69 | Entropy loss: -0.0

TEST: Decision Step reward: -2.5
TEST: Step: 200
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 300
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 68/100  | Episode Reward: -15  | Average Reward 7.08  | Actor loss: 0.01 | Critic loss: 2.96 | Entropy loss: -0.0168  | Total Loss: 2.95 | Total Steps: 371
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 69/100  | Episode Reward: 8  | Average Reward 7.05  | Actor loss: 0.00 | Critic loss: 0.15 | Entropy loss: -0.0035  | Total Loss: 0.14 | Total Steps: 53
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 10  | Average Reward 7.05  | Actor loss: 0.01 | Critic loss: 0.27 | Entropy loss: -0.0008  | Total Loss

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 96/100  | Episode Reward: 10  | Average Reward 6.96  | Actor loss: 0.02 | Critic loss: 3.68 | Entropy loss: -0.0020  | Total Loss: 3.69 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 97/100  | Episode Reward: 10  | Average Reward 6.96  | Actor loss: 0.00 | Critic loss: 0.32 | Entropy loss: -0.0011  | Total Loss: 0.32 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 10  | Average Reward 6.96  | Actor loss: 0.01 | Critic loss: 2.86 | Entropy loss: -0.0007  | Total Loss: 2.86 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 529/68000  | Episode Reward: 10  | Average Reward 9.60  | Actor loss: 0.00 | Critic loss: 0.11 | Entropy loss: -0.0001  | Total Loss: 0.11 | Total Steps: 37

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 530/68000  | Episode Reward: 10  | Average Reward 9.60  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 531/68000  | Episode Reward: 10  | Average Reward 9.60  | Actor loss: -0.12 | Critic loss: 0.75 | Entropy loss: -0.0014  | Total Loss: 0.63 | Total Steps: 47

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 532/68000  | Episode Reward: 10  | Average Reward 9.60  | Actor loss: 0.07 | Critic loss: 0.50 | Entropy loss: -0.0034  | Total Loss: 0.57 | Total Steps: 37

---blue capsule---
Agent 


---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 563/68000  | Episode Reward: 10  | Average Reward 9.55  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 564/68000  | Episode Reward: 10  | Average Reward 9.55  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0001  | Total Loss: 0.08 | Total Steps: 36

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 565/68000  | Episode Reward: 10  | Average Reward 9.55  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 566/68000  | Episode Reward: 10  | Average Reward 9.55  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 596/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.44 | Entropy loss: -0.0001  | Total Loss: 0.44 | Total Steps: 31

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 597/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 598/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: -0.00 | Critic loss: 0.44 | Entropy loss: -0.0001  | Total Loss: 0.44 | Total Steps: 31

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 599/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.15 | Entropy loss: -0.0000  | Total Loss: 0.15 | Total Steps: 36

---red prism---
Agent in t

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 24/100  | Episode Reward: 10  | Average Reward 6.58  | Actor loss: 0.00 | Critic loss: 0.76 | Entropy loss: -0.0101  | Total Loss: 0.75 | Total Steps: 67
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 25/100  | Episode Reward: 10  | Average Reward 6.58  | Actor loss: 0.00 | Critic loss: 1.98 | Entropy loss: -0.0070  | Total Loss: 1.98 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 26/100  | Episode Reward: 5  | Average Reward 6.55  | Actor loss: 0.53 | Critic loss: 6.95 | Entropy loss: -0.0071  | Total Loss: 7.48 | Total Steps: 59
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 10  | Average Reward 6.58  | Actor loss: 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 10  | Average Reward 7.72  | Actor loss: 0.00 | Critic loss: 0.49 | Entropy loss: -0.0084  | Total Loss: 0.48 | Total Steps: 42
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: 10  | Average Reward 7.72  | Actor loss: 0.02 | Critic loss: 0.15 | Entropy loss: -0.0009  | Total Loss: 0.18 | Total Steps: 36
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100  | Episode Reward: 5  | Average Reward 7.67  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0016  | Total Loss: 0.00 | Total Steps: 46
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 59/100  | Episode Reward: 8  | Av

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 86/100  | Episode Reward: 8  | Average Reward 8.47  | Actor loss: 0.00 | Critic loss: 0.04 | Entropy loss: -0.0052  | Total Loss: 0.03 | Total Steps: 47
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 87/100  | Episode Reward: 10  | Average Reward 8.47  | Actor loss: 0.00 | Critic loss: 0.86 | Entropy loss: -0.0013  | Total Loss: 0.86 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 88/100  | Episode Reward: 10  | Average Reward 8.57  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0017  | Total Loss: 0.08 | Total Steps: 31
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 89/100  | Episode Reward: 10  | Average Reward 8.57  | Actor loss: -0.00 | Critic loss: 0.03 | Entrop

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 618/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.00 | Critic loss: 0.74 | Entropy loss: -0.0001  | Total Loss: 0.74 | Total Steps: 31

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 619/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.01 | Critic loss: 0.28 | Entropy loss: -0.0004  | Total Loss: 0.27 | Total Steps: 41

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 620/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 621/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.01 | Critic loss: 0.23 | Entropy loss: -0.0001  | Total Loss: 0.24 | Total Steps: 36

---blue cube---
Agent in termina

Training  | Episode: 651/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 652/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.08 | Critic loss: 0.71 | Entropy loss: -0.0014  | Total Loss: 0.79 | Total Steps: 31

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 653/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.64 | Entropy loss: -0.0001  | Total Loss: 0.64 | Total Steps: 31

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 654/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.55 | Entropy loss: -0.0001  | Total Loss: 0.55 | Total Steps: 31

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
-----The 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 684/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.00 | Critic loss: 0.17 | Entropy loss: -0.0001  | Total Loss: 0.18 | Total Steps: 36

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 685/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.02 | Critic loss: 0.27 | Entropy loss: -0.0003  | Total Loss: 0.29 | Total Steps: 41

---red prism---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 686/68000  | Episode Reward: 8  | Average Reward 9.85  | Actor loss: -0.26 | Critic loss: 1.96 | Entropy loss: -0.0015  | Total Loss: 1.69 | Total Steps: 43

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 687/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

--

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 12/100  | Episode Reward: 8  | Average Reward 8.05  | Actor loss: 0.00 | Critic loss: 0.18 | Entropy loss: -0.0132  | Total Loss: 0.17 | Total Steps: 41
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 13/100  | Episode Reward: 10  | Average Reward 8.05  | Actor loss: 0.02 | Critic loss: 4.93 | Entropy loss: -0.0635  | Total Loss: 4.88 | Total Steps: 8
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 14/100  | Episode Reward: 8  | Average Reward 8.30  | Actor loss: 0.00 | Critic loss: 0.70 | Entropy loss: -0.0031  | Total Loss: 0.70 | Total Steps: 34
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 15/100  | Episode Reward: 10  | Average Reward 8.32  | Actor loss: 0.01 | Critic loss: 2.92 | Entropy l

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 42/100  | Episode Reward: 8  | Average Reward 8.18  | Actor loss: 0.00 | Critic loss: 0.17 | Entropy loss: -0.0057  | Total Loss: 0.16 | Total Steps: 42
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 10  | Average Reward 8.18  | Actor loss: 0.00 | Critic loss: 0.02 | Entropy loss: -0.0010  | Total Loss: 0.02 | Total Steps: 6
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 44/100  | Episode Reward: 10  | Average Reward 8.18  | Actor loss: 4.96 | Critic loss: 10.30 | Entropy loss: -0.0088  | Total Loss: 15.25 | Total Steps: 58
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 45/100  | Episode Reward: 10  | Average Reward 8.20  | Actor loss: 0.07 | Critic loss: 14.43 | Entr

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 10  | Average Reward 8.10  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0063  | Total Loss: 0.06 | Total Steps: 31
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 73/100  | Episode Reward: 10  | Average Reward 8.10  | Actor loss: 1.25 | Critic loss: 0.80 | Entropy loss: -0.0045  | Total Loss: 2.04 | Total Steps: 36
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 74/100  | Episode Reward: 10  | Average Reward 8.12  | Actor loss: 0.00 | Critic loss: 0.71 | Entropy loss: -0.0005  | Total Loss: 0.71 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 75/100  | Episode Reward: 10  | Average Reward 8.22  | Actor loss: 0.37 | Critic loss: 1.45 | Entropy loss: -0.0070  | Total Loss:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 703/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.50 | Critic loss: 3.45 | Entropy loss: -0.0057  | Total Loss: 2.95 | Total Steps: 65

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 704/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.04 | Entropy loss: -0.0001  | Total Loss: 0.04 | Total Steps: 36

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 705/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.34 | Entropy loss: -0.0000  | Total Loss: 0.34 | Total Steps: 6

---yellow cylinder---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 706/68000  | Episode Reward: 8  | Average Reward 9.78  | Actor loss: -0.24 | Critic loss: 2.67 | Entropy loss: -0.0011  | Total Loss: 2.43 | Total Steps

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 737/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.23 | Entropy loss: -0.0000  | Total Loss: 0.23 | Total Steps: 36

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 738/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.02 | Critic loss: 0.19 | Entropy loss: -0.0003  | Total Loss: 0.21 | Total Steps: 41

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 739/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.51 | Critic loss: 5.44 | Entropy loss: -0.0021  | Total Loss: 4.93 | Total Steps: 59

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 740/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---blue sphere---
Age

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 770/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.01 | Critic loss: 0.04 | Entropy loss: -0.0002  | Total Loss: 0.04 | Total Steps: 36

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 771/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 772/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 773/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.06 | Critic loss: 0.25 | Entropy loss: -0.0030  | Total Loss: 0.19 | Total Steps: 43

---black cylinder--

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 8  | Average Reward 8.24  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0005  | Total Loss: 0.08 | Total Steps: 29
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3/100  | Episode Reward: 10  | Average Reward 8.27  | Actor loss: 0.00 | Critic loss: 1.74 | Entropy loss: -0.0006  | Total Loss: 1.74 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 10  | Average Reward 8.27  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0002  | Total Loss: 0.00 | Total Steps: 6
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 5/100  | Episode Reward: 10  | Average Reward 8.27  | Actor loss: 0.00 | Critic loss: 1.84 | Entropy loss: -0.0024  | Total Loss: 1.84 |

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 33/100  | Episode Reward: 8  | Average Reward 8.79  | Actor loss: 0.48 | Critic loss: 8.95 | Entropy loss: -0.0164  | Total Loss: 9.41 | Total Steps: 57
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 34/100  | Episode Reward: 10  | Average Reward 8.81  | Actor loss: 0.01 | Critic loss: 2.81 | Entropy loss: -0.0020  | Total Loss: 2.81 | Total Steps: 36
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 35/100  | Episode Reward: 8  | Average Reward 8.79  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0004  | Total Loss: 0.08 | Total Steps: 29
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 36/100  | Episode Reward: 10  | Average Reward 8.79  | Actor loss: 

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 62/100  | Episode Reward: 8  | Average Reward 8.62  | Actor loss: 0.02 | Critic loss: 3.39 | Entropy loss: -0.0092  | Total Loss: 3.40 | Total Steps: 87
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 63/100  | Episode Reward: 8  | Average Reward 8.59  | Actor loss: 0.01 | Critic loss: 0.21 | Entropy loss: -0.0068  | Total Loss: 0.21 | Total Steps: 34
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 64/100  | Episode Reward: 8  | Average Reward 8.56  | Actor loss: 0.00 | Critic loss: 0.09 | Entropy loss: -0.0004  | Total Loss: 0.09 | Total Steps: 29
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 65/100  | Episode Reward: 10  | Avera

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 92/100  | Episode Reward: 10  | Average Reward 8.27  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0012  | Total Loss: 0.01 | Total Steps: 6
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: 8  | Average Reward 8.29  | Actor loss: 0.00 | Critic loss: 0.58 | Entropy loss: -0.0064  | Total Loss: 0.58 | Total Steps: 39
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: 8  | Average Reward 8.32  | Actor loss: 2.54 | Critic loss: 10.44 | Entropy loss: -0.0161  | Total Loss: 12.97 | Total Steps: 80
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 10  | Average Reward 8.43  | Actor los

Training  | Episode: 825/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 826/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---black prism---
Decision Step reward: -2.5
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 827/68000  | Episode Reward: 5  | Average Reward 9.80  | Actor loss: -1.31 | Critic loss: 9.27 | Entropy loss: -0.0081  | Total Loss: 7.95 | Total Steps: 81

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 828/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---red sphere---
Agent 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 858/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: 0.10 | Critic loss: 0.85 | Entropy loss: -0.0018  | Total Loss: 0.94 | Total Steps: 30

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 859/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: 0.00 | Critic loss: 0.30 | Entropy loss: -0.0024  | Total Loss: 0.30 | Total Steps: 42

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 860/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 861/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: 0.00 | Critic loss: 0.45 | Entropy loss: -0.0001  | Total Loss: 0.46 | Total Steps: 31

---blue capsule---
Agent


---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 892/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.10 | Entropy loss: -0.0000  | Total Loss: 0.10 | Total Steps: 6

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 893/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 894/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.01 | Critic loss: 0.11 | Entropy loss: -0.0004  | Total Loss: 0.12 | Total Steps: 36

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 895/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---blue p

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 10  | Average Reward 8.39  | Actor loss: 0.26 | Critic loss: 6.16 | Entropy loss: -0.0080  | Total Loss: 6.42 | Total Steps: 42
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 23/100  | Episode Reward: 10  | Average Reward 8.44  | Actor loss: 0.00 | Critic loss: 0.18 | Entropy loss: -0.0014  | Total Loss: 0.18 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 24/100  | Episode Reward: 8  | Average Reward 8.41  | Actor loss: 0.74 | Critic loss: 6.60 | Entropy loss: -0.0073  | Total Loss: 7.33 | Total Steps: 70
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 25/100  | Episode Reward: 10  | Average Reward 8.41  | Actor loss: 0.00 | Critic loss: 0.07 | Entro

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 53/100  | Episode Reward: 10  | Average Reward 8.69  | Actor loss: -0.00 | Critic loss: 0.15 | Entropy loss: -0.0043  | Total Loss: 0.15 | Total Steps: 41
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 54/100  | Episode Reward: 10  | Average Reward 8.69  | Actor loss: 0.01 | Critic loss: 2.35 | Entropy loss: -0.0010  | Total Loss: 2.36 | Total Steps: 36
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 55/100  | Episode Reward: 8  | Average Reward 8.69  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0014  | Total Loss: -0.00 | Total Steps: 29
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 10  | Average Reward 8.69  | Actor loss: 0.23 | Critic loss: 0.22 | Entr

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 83/100  | Episode Reward: 5  | Average Reward 8.84  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0060  | Total Loss: -0.00 | Total Steps: 42
TEST: ---yellow prism---
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 84/100  | Episode Reward: 9  | Average Reward 8.83  | Actor loss: 0.03 | Critic loss: 2.54 | Entropy loss: -0.0345  | Total Loss: 2.54 | Total Steps: 33
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 85/100  | Episode Reward: 5  | Average Reward 8.78  | Actor loss: 0.00 | Critic loss: 0.10 | Entropy loss: -0.0036  | Total Loss: 0.09 | Total Steps: 47
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step rewa


---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 915/68000  | Episode Reward: 10  | Average Reward 9.90  | Actor loss: -0.00 | Critic loss: 0.32 | Entropy loss: -0.0000  | Total Loss: 0.32 | Total Steps: 31

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 916/68000  | Episode Reward: 10  | Average Reward 9.90  | Actor loss: -0.01 | Critic loss: 0.78 | Entropy loss: -0.0002  | Total Loss: 0.77 | Total Steps: 30

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 917/68000  | Episode Reward: 10  | Average Reward 9.90  | Actor loss: -0.00 | Critic loss: 0.23 | Entropy loss: -0.0002  | Total Loss: 0.23 | Total Steps: 31

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 918/68000  | Episode Reward: 10  | Average Reward 9.90  | Actor loss: -0.00 | Critic loss: 0.40 | Entropy loss: -0.0000  | Total Loss: 0.40 | Total Steps: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 947/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: 0.00 | Critic loss: 0.09 | Entropy loss: -0.0001  | Total Loss: 0.10 | Total Steps: 36

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 948/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: -0.00 | Critic loss: 0.10 | Entropy loss: -0.0001  | Total Loss: 0.10 | Total Steps: 37

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 949/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: -0.00 | Critic loss: 0.36 | Entropy loss: -0.0000  | Total Loss: 0.36 | Total Steps: 31

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 950/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: 0.00 | Critic loss: 0.09 | Entropy loss: -0.0000  | Total Loss: 0.09 | Total Steps: 36

---blue prism---
Agen

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 980/68000  | Episode Reward: 10  | Average Reward 9.90  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow capsule---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 981/68000  | Episode Reward: 8  | Average Reward 9.88  | Actor loss: -0.16 | Critic loss: 2.18 | Entropy loss: -0.0015  | Total Loss: 2.02 | Total Steps: 43

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 982/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.00 | Critic loss: 0.55 | Entropy loss: -0.0006  | Total Loss: 0.55 | Total Steps: 32

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 983/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 10/100  | Episode Reward: 8  | Average Reward 8.87  | Actor loss: 0.00 | Critic loss: 0.76 | Entropy loss: -0.0018  | Total Loss: 0.76 | Total Steps: 37
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 11/100  | Episode Reward: 10  | Average Reward 8.87  | Actor loss: 0.02 | Critic loss: 10.12 | Entropy loss: -0.0047  | Total Loss: 10.13 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 12/100  | Episode Reward: 10  | Average Reward 8.87  | Actor loss: 0.05 | Critic loss: 0.91 | Entropy loss: -0.0006  | Total Loss: 0.96 | Total Steps: 36
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 13/100  | Episode Reward: 10  | Average Reward 8.87  | Actor loss: 0.04 | Critic loss: 1.02 | Entropy loss: -0.0030  | Total Loss: 1.0

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 10  | Average Reward 8.94  | Actor loss: 0.00 | Critic loss: 1.34 | Entropy loss: -0.0005  | Total Loss: 1.34 | Total Steps: 31
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 10  | Average Reward 8.96  | Actor loss: 0.00 | Critic loss: 3.01 | Entropy loss: -0.0086  | Total Loss: 3.00 | Total Steps: 37
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 42/100  | Episode Reward: 10  | Average Reward 8.96  | Actor loss: 0.01 | Critic loss: 2.60 | Entropy loss: -0.0013  | Total Loss: 2.61 | Total Steps: 36
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 10  | Average Reward 8.96  | Actor loss: 0.00 | Critic loss: 0.74 | Entropy loss: -0.0005  | Total Loss: 

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 8  | Average Reward 9.04  | Actor loss: 0.01 | Critic loss: 2.68 | Entropy loss: -0.0074  | Total Loss: 2.69 | Total Steps: 47
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 10  | Average Reward 9.04  | Actor loss: 0.00 | Critic loss: 0.60 | Entropy loss: -0.0043  | Total Loss: 0.60 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 73/100  | Episode Reward: 10  | Average Reward 9.04  | Actor loss: 0.02 | Critic loss: 5.19 | Entropy loss: -0.0057  | Total Loss: 5.20 | Total Steps: 31
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 74/100  | Episode Reward: 10  | Average Reward 9.04  | Actor loss: 0.07 | Critic loss: 1.55 | Entropy l

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1003/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.17 | Critic loss: 1.50 | Entropy loss: -0.0009  | Total Loss: 1.67 | Total Steps: 33

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1004/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: -0.01 | Critic loss: 2.85 | Entropy loss: -0.0004  | Total Loss: 2.84 | Total Steps: 55

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1005/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: -0.01 | Critic loss: 0.20 | Entropy loss: -0.0006  | Total Loss: 0.19 | Total Steps: 41

---yellow cylinder---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1006/68000  | Episode Reward: 8  | Average Reward 9.66  | Actor loss: -0.61 | Critic loss: 5.95 | Entropy loss: -0.0054  | Total Loss: 5.33 | Total

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1036/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.00 | Critic loss: 0.51 | Entropy loss: -0.0001  | Total Loss: 0.52 | Total Steps: 31

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1037/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---black cube---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1038/68000  | Episode Reward: 8  | Average Reward 9.66  | Actor loss: -0.09 | Critic loss: 2.75 | Entropy loss: -0.0007  | Total Loss: 2.66 | Total Steps: 43

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1039/68000  | Episode Reward: 10  | Average Reward 9.66  | Actor loss: 0.00 | Critic loss: 0.06 | Entropy loss: -0.0001  | Total Loss: 0.06 | Total Steps

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1069/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.02 | Critic loss: 0.37 | Entropy loss: -0.0013  | Total Loss: 0.39 | Total Steps: 42

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1070/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: -0.11 | Critic loss: 1.85 | Entropy loss: -0.0015  | Total Loss: 1.74 | Total Steps: 71

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1071/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1072/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.01 | Critic loss: 0.71 | Entropy loss: -0.0002  | Total Loss: 0.71 | Total Steps: 31

---green prism---
Agent 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 1/100  | Episode Reward: 10  | Average Reward 9.20  | Actor loss: 0.00 | Critic loss: 0.04 | Entropy loss: -0.0027  | Total Loss: 0.04 | Total Steps: 41
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 5  | Average Reward 9.15  | Actor loss: 0.00 | Critic loss: 0.13 | Entropy loss: -0.0101  | Total Loss: 0.12 | Total Steps: 58
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3/100  | Episode Reward: 10  | Average Reward 9.28  | Actor loss: 0.01 | Critic loss: 2.95 | Entropy loss: -0.0049  | Total Loss: 2.96 | Total Steps: 6
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 10  | Average Reward 9.28  | Actor loss: 0.00 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 32/100  | Episode Reward: 10  | Average Reward 9.12  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0001  | Total Loss: 0.00 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 33/100  | Episode Reward: 8  | Average Reward 9.12  | Actor loss: 0.20 | Critic loss: 4.80 | Entropy loss: -0.0095  | Total Loss: 4.99 | Total Steps: 69
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 34/100  | Episode Reward: 10  | Average Reward 9.12  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0016  | Total Loss: -0.00 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 35/100  | Episode Reward: 10  | Average Reward 9.15  | Actor loss: 0.00 | Critic loss: 0.32 | Entr

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 63/100  | Episode Reward: 10  | Average Reward 9.18  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0003  | Total Loss: 0.00 | Total Steps: 31
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 64/100  | Episode Reward: 10  | Average Reward 9.18  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0004  | Total Loss: 0.01 | Total Steps: 31
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 65/100  | Episode Reward: 10  | Average Reward 9.18  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0009  | Total Loss: 0.01 | Total Steps: 36
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 66/100  | Episode Reward: 10  | Average Reward 9.18  | Actor loss: 0.00 | Critic loss: 0.73 | Entropy loss: -0.0006  | Total Loss: 0

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: -2  | Average Reward 8.85  | Actor loss: 0.51 | Critic loss: 6.51 | Entropy loss: -0.0091  | Total Loss: 7.01 | Total Steps: 191
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: 10  | Average Reward 8.85  | Actor loss: 0.00 | Critic loss: 0.43 | Entropy loss: -0.0005  | Total Loss: 0.43 | Total Steps: 31
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 8  | Average Reward 8.82  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0026  | Total Loss: 0.04 | Total Steps: 30
TEST: ---black sphere---
T

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1127/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0000  | Total Loss: 0.08 | Total Steps: 36

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1128/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.04 | Critic loss: 0.96 | Entropy loss: -0.0005  | Total Loss: 0.91 | Total Steps: 47

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1129/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.39 | Critic loss: 0.65 | Entropy loss: -0.0036  | Total Loss: 1.03 | Total Steps: 39

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1130/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.32 | Entropy loss: -0.0001  | Total Loss: 0.32 | Total Steps: 31

---green sphere---
Agen

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1160/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.00 | Critic loss: 0.14 | Entropy loss: -0.0003  | Total Loss: 0.14 | Total Steps: 39

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1161/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.00 | Critic loss: 0.30 | Entropy loss: -0.0003  | Total Loss: 0.30 | Total Steps: 41

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1162/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1163/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0000  | Total Loss: 0.07 | Total Steps: 6

---red cube---
Agent

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1193/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.27 | Entropy loss: -0.0002  | Total Loss: 0.27 | Total Steps: 41

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1194/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.09 | Entropy loss: -0.0001  | Total Loss: 0.09 | Total Steps: 36

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1195/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.44 | Entropy loss: -0.0000  | Total Loss: 0.44 | Total Steps: 31

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1196/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.32 | Entropy loss: -0.0001  | Total Loss: 0.32 | Total Steps: 41

---yellow cyli

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 23/100  | Episode Reward: 2  | Average Reward 8.88  | Actor loss: 1.50 | Critic loss: 7.26 | Entropy loss: -0.0161  | Total Loss: 8.75 | Total Steps: 128
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 24/100  | Episode Reward: 8  | Average Reward 8.88  | Actor loss: 0.00 | Critic loss: 0.36 | Entropy loss: -0.0016  | Total Loss: 0.36 | Total Steps: 47
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 25/100  | Episode Reward: 10  | Average Reward 8.88  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0007  | Total Loss: 0.01 | Total Steps: 36
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 52/100  | Episode Reward: 5  | Average Reward 8.68  | Actor loss: 0.01 | Critic loss: 1.17 | Entropy loss: -0.0062  | Total Loss: 1.18 | Total Steps: 46
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 53/100  | Episode Reward: 10  | Average Reward 8.68  | Actor loss: 0.00 | Critic loss: 0.03 | Entropy loss: -0.0011  | Total Loss: 0.03 | Total Steps: 6
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 54/100  | Episode Reward: 8  | Average Reward 8.65  | Actor loss: 0.00 | Critic loss: 0.09 | Entropy loss: -0.0060  | Total Loss: 0.09 | Total Steps: 47
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 55/100  | Episode Reward: 8  | Average Reward 8.62  | Actor loss: 6.

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 10  | Average Reward 8.15  | Actor loss: 0.98 | Critic loss: 9.40 | Entropy loss: -0.0122  | Total Loss: 10.37 | Total Steps: 70
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 81/100  | Episode Reward: 10  | Average Reward 8.15  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0012  | Total Loss: 0.07 | Total Steps: 36
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 82/100  | Episode Reward: 2  | Average Reward 8.07  | Actor loss: 0.00 | Critic loss: 0.27 | Entropy loss: -0.0126  | Total Loss: 0.25 | Total Steps: 51
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 83/100  | Episode Reward: 10  | Av

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1212/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1213/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1214/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.07 | Critic loss: 0.16 | Entropy loss: -0.0019  | Total Loss: 0.23 | Total Steps: 41

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1215/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.67 | Entropy loss: -0.0001  | Total Loss: 0.67 | Total Steps: 31

---black prism---
A

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1245/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0000  | Total Loss: 0.07 | Total Steps: 6

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1246/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.13 | Entropy loss: -0.0001  | Total Loss: 0.13 | Total Steps: 37

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1247/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.22 | Critic loss: 1.11 | Entropy loss: -0.0039  | Total Loss: 0.89 | Total Steps: 45

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1248/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---yellow sphere---


Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1278/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.01 | Critic loss: 0.12 | Entropy loss: -0.0018  | Total Loss: 0.13 | Total Steps: 37

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1279/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1280/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1281/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0001  | Total Loss: 0.06 | Total Steps: 37

---yellow cube---
Ag

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 9/100  | Episode Reward: 8  | Average Reward 8.30  | Actor loss: 0.00 | Critic loss: 0.38 | Entropy loss: -0.0275  | Total Loss: 0.36 | Total Steps: 38
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 10/100  | Episode Reward: 10  | Average Reward 8.32  | Actor loss: 0.12 | Critic loss: 12.12 | Entropy loss: -0.0256  | Total Loss: 12.21 | Total Steps: 67
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 11/100  | Episode Reward: 8  | Average Reward 8.32  | Actor loss: 0.00 | Critic loss: 0.21 | Entropy loss: -0.0065  | Total Loss: 0.20 | Total Steps: 47
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 12/100  | Episode Reward: 10  | Average Reward 8.32  | Actor loss:

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 10  | Average Reward 8.32  | Actor loss: 0.64 | Critic loss: 2.57 | Entropy loss: -0.0157  | Total Loss: 3.20 | Total Steps: 30
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 10  | Average Reward 8.32  | Actor loss: 0.00 | Critic loss: 0.58 | Entropy loss: -0.0020  | Total Loss: 0.57 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 42/100  | Episode Reward: 9  | Average Reward 8.34  | Actor loss: 0.01 | Critic loss: 4.27 | Entropy loss: -0.0348  | Total Loss: 4.24 | Total Steps: 32
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 10  | Average Reward 8.34  | Actor loss: 0.01 | Critic loss: 0.25 | Entr

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 69/100  | Episode Reward: 10  | Average Reward 8.59  | Actor loss: 0.02 | Critic loss: 3.45 | Entropy loss: -0.0065  | Total Loss: 3.47 | Total Steps: 37
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 5  | Average Reward 8.54  | Actor loss: 0.00 | Critic loss: 0.34 | Entropy loss: -0.0034  | Total Loss: 0.34 | Total Steps: 46
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 10  | Average Reward 8.64  | Actor loss: 0.01 | Critic loss: 0.42 | Entropy loss: -0.0099  | Total Loss: 0.41 | Total Steps: 40
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 10  | Average Reward 8.69  | Actor loss: 0.00

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 10  | Average Reward 8.79  | Actor loss: 0.00 | Critic loss: 2.01 | Entropy loss: -0.0013  | Total Loss: 2.02 | Total Steps: 31

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1302/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.11 | Entropy loss: -0.0000  | Total Loss: 0.11 | Total Steps: 6

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1303/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1304/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 4.39 | Entropy loss: -0.0036  | Total Loss: 4.38 | Total Steps: 74

---green pri

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1334/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.02 | Critic loss: 0.35 | Entropy loss: -0.0002  | Total Loss: 0.36 | Total Steps: 41

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1335/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: -0.00 | Critic loss: 0.13 | Entropy loss: -0.0002  | Total Loss: 0.13 | Total Steps: 41

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1336/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.85 | Entropy loss: -0.0001  | Total Loss: 0.85 | Total Steps: 31

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1337/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0001  | Total Loss: 0.03 | Total Steps: 36

---black cube

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1367/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.03 | Critic loss: 0.15 | Entropy loss: -0.0010  | Total Loss: 0.18 | Total Steps: 36

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1368/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.00 | Critic loss: 0.29 | Entropy loss: -0.0000  | Total Loss: 0.29 | Total Steps: 31

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1369/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.00 | Critic loss: 0.26 | Entropy loss: -0.0002  | Total Loss: 0.27 | Total Steps: 41

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1370/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---blue capsule-

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1400/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.04 | Critic loss: 1.22 | Entropy loss: -0.0008  | Total Loss: 1.26 | Total Steps: 30

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1401/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.00 | Critic loss: 0.12 | Entropy loss: -0.0002  | Total Loss: 0.13 | Total Steps: 37
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 1/100  | Episode Reward: 10  | Average Reward 8.79  | Actor loss: 0.01 | Critic loss: 2.26 | Entropy loss: -0.0014  | Total Loss: 2.27 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 10  | Average Reward 8.79  | Actor loss: 0.00 | Critic loss: 0.19 | Entropy loss: -0.0132  | Total Loss: 0.18 | Total Steps: 3

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 31/100  | Episode Reward: 8  | Average Reward 8.79  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0026  | Total Loss: 0.00 | Total Steps: 43
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 32/100  | Episode Reward: 10  | Average Reward 8.79  | Actor loss: 0.00 | Critic loss: 0.21 | Entropy loss: -0.0031  | Total Loss: 0.21 | Total Steps: 41
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 33/100  | Episode Reward: 10  | Average Reward 8.79  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0006  | Total Loss: 0.00 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 34/100  | Episode Reward: 10  | Average Reward 8.81  | Actor loss: 0.00 | Critic loss: 0.47 | Entropy

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 62/100  | Episode Reward: 10  | Average Reward 9.07  | Actor loss: 0.01 | Critic loss: 1.70 | Entropy loss: -0.0075  | Total Loss: 1.70 | Total Steps: 37
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 63/100  | Episode Reward: 10  | Average Reward 9.07  | Actor loss: 0.00 | Critic loss: 1.76 | Entropy loss: -0.0023  | Total Loss: 1.76 | Total Steps: 6
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 64/100  | Episode Reward: 8  | Average Reward 9.05  | Actor loss: -0.00 | Critic loss: 0.14 | Entropy loss: -0.0045  | Total Loss: 0.13 | Total Steps: 38
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 65/100  | Episode Reward: 10  | Average Reward 9.05  | Actor loss: 0.01 | Critic loss: 2.26 | Entropy

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: 10  | Average Reward 9.25  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0005  | Total Loss: 0.06 | Total Steps: 36
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: 8  | Average Reward 9.22  | Actor loss: -0.00 | Critic loss: 0.12 | Entropy loss: -0.0002  | Total Loss: 0.12 | Total Steps: 47
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 8  | Average Reward 9.20  | Actor loss: 0.00 | Critic loss: 0.42 | Entropy loss: -0.0040  | Total Loss: 0.42 | Total Steps: 38
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 96/100  | Episode Reward: 10  | Average Reward 9.20  | Actor loss: 0.

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1426/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.01 | Critic loss: 0.12 | Entropy loss: -0.0001  | Total Loss: 0.13 | Total Steps: 36

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1427/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.15 | Entropy loss: -0.0000  | Total Loss: 0.15 | Total Steps: 36

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1428/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1429/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---red cube---
Agent 


---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1460/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.00 | Critic loss: 0.25 | Entropy loss: -0.0000  | Total Loss: 0.25 | Total Steps: 31

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1461/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: -0.00 | Critic loss: 0.04 | Entropy loss: -0.0002  | Total Loss: 0.04 | Total Steps: 36

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1462/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.00 | Critic loss: 0.27 | Entropy loss: -0.0000  | Total Loss: 0.27 | Total Steps: 31

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1463/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.00 | Critic loss: 0.41 | Entropy loss: -0.0002  | Total Loss: 0.42 | Total Steps:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1494/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1495/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.00 | Critic loss: 0.44 | Entropy loss: -0.0001  | Total Loss: 0.44 | Total Steps: 31

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1496/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.00 | Critic loss: 0.35 | Entropy loss: -0.0000  | Total Loss: 0.34 | Total Steps: 31

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1497/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.00 | Critic loss: 0.05 | Entropy loss: -0.0000  | Total Loss: 0.05 | Total Steps: 6

---black cylinder---
Agent

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 24/100  | Episode Reward: 10  | Average Reward 9.12  | Actor loss: 0.00 | Critic loss: 0.06 | Entropy loss: -0.0042  | Total Loss: 0.05 | Total Steps: 37
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 25/100  | Episode Reward: 10  | Average Reward 9.12  | Actor loss: 0.00 | Critic loss: 1.68 | Entropy loss: -0.0012  | Total Loss: 1.68 | Total Steps: 31
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 26/100  | Episode Reward: 10  | Average Reward 9.15  | Actor loss: 0.00 | Critic loss: 0.55 | Entropy loss: -0.0006  | Total Loss: 0.55 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 10  | Average Reward 9.20  | Actor loss: -0.00 | Critic loss: 0.04 | Entropy loss: -0.0155  | Total Loss: 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 55/100  | Episode Reward: 10  | Average Reward 9.22  | Actor loss: 0.00 | Critic loss: 0.50 | Entropy loss: -0.0007  | Total Loss: 0.50 | Total Steps: 31
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 8  | Average Reward 9.20  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0030  | Total Loss: 0.02 | Total Steps: 29
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: 5  | Average Reward 9.15  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0068  | Total Loss: 0.01 | Total Steps: 42
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 84/100  | Episode Reward: 8  | Average Reward 8.88  | Actor loss: 0.09 | Critic loss: 5.83 | Entropy loss: -0.0039  | Total Loss: 5.92 | Total Steps: 43
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 85/100  | Episode Reward: -2  | Average Reward 8.76  | Actor loss: 0.29 | Critic loss: 8.16 | Entropy loss: -0.0099  | Total Loss: 8.44 | Total Steps: 156
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 86/100  | Episode Reward: 10  | Average Reward 8.76  | Actor loss: 0.00 | Critic loss: 0.14 | Entropy loss: -0.0017  | Total Loss: 0.14 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TES

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1516/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1517/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0000  | Total Loss: 0.07 | Total Steps: 36

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1518/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.35 | Entropy loss: -0.0000  | Total Loss: 0.35 | Total Steps: 31

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1519/68000  | Episode Reward: 10  | Average Reward 9.90  | Actor loss: -0.01 | Critic loss: 1.30 | Entropy loss: -0.0001  | Total Loss: 1.29 | Total Steps: 47

---yellow cube---


Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1550/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.01 | Critic loss: 0.25 | Entropy loss: -0.0001  | Total Loss: 0.26 | Total Steps: 36

---blue cube---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1551/68000  | Episode Reward: 8  | Average Reward 9.85  | Actor loss: -0.03 | Critic loss: 3.64 | Entropy loss: -0.0002  | Total Loss: 3.60 | Total Steps: 47

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1552/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.01 | Critic loss: 0.15 | Entropy loss: -0.0002  | Total Loss: 0.16 | Total Steps: 36

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1553/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.03 | Critic loss: 0.33 | Entropy loss: -0.0004  | Total Loss: 0.36 | Total 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1583/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.02 | Critic loss: 0.56 | Entropy loss: -0.0004  | Total Loss: 0.54 | Total Steps: 36

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1584/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.11 | Critic loss: 0.04 | Entropy loss: -0.0016  | Total Loss: -0.07 | Total Steps: 7

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1585/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.01 | Critic loss: 0.06 | Entropy loss: -0.0003  | Total Loss: 0.05 | Total Steps: 36

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1586/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.14 | Entropy loss: -0.0000  | Total Loss: 0.14 | Total Steps: 36

---black capsul

TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 13/100  | Episode Reward: 5  | Average Reward 8.55  | Actor loss: 0.22 | Critic loss: 4.40 | Entropy loss: -0.0105  | Total Loss: 4.61 | Total Steps: 196
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 14/100  | Episode Reward: 8  | Average Reward 8.55  | Actor loss: 0.00 | Critic loss: 0.43 | Entropy loss: -0.0021  | Total Loss: 0.43 | Total Steps: 29
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 15/100  | Episode Reward: 10  | Average Reward 8.61  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0004  | Total Loss: 0.00 | Total Steps: 6
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 8  | Average Reward 8.58  | Actor loss: 0.00 | Critic loss: 0.32 | Entropy loss: -0.0184  | Total Loss: 0.31 | Total Steps: 35
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 44/100  | Episode Reward: 10  | Average Reward 8.58  | Actor loss: 0.00 | Critic loss: 0.03 | Entropy loss: -0.0011  | Total Loss: 0.03 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 45/100  | Episode Reward: 8  | Average Reward 8.55  | Actor loss: 0.00 | Critic loss: 0.19 | Entropy loss: -0.0003  | Total Loss: 0.19 | Total Steps: 47
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 46/100  | Episode Reward: 8  | Average Reward 8.53  | Actor loss: 0

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 73/100  | Episode Reward: 10  | Average Reward 8.64  | Actor loss: 0.01 | Critic loss: 0.11 | Entropy loss: -0.0017  | Total Loss: 0.11 | Total Steps: 39
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 74/100  | Episode Reward: 0  | Average Reward 8.54  | Actor loss: 0.00 | Critic loss: 1.80 | Entropy loss: -0.0080  | Total Loss: 1.80 | Total Steps: 94
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 75/100  | Episode Reward: 10  | Average Reward 8.54  | Actor loss: 0.00 | Critic loss: 2.39 | Entropy loss: -0.0052  | Total Loss: 2.39 | Total Steps: 6
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in term

Training  | Episode: 1603/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1604/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.01 | Critic loss: 0.88 | Entropy loss: -0.0001  | Total Loss: 0.88 | Total Steps: 31

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1605/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.58 | Entropy loss: -0.0000  | Total Loss: 0.58 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1606/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.03 | Critic loss: 0.50 | Entropy loss: -0.0005  | Total Loss: 0.53 | Total Steps: 42

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1636/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.29 | Entropy loss: -0.0000  | Total Loss: 0.29 | Total Steps: 31

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1637/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.22 | Entropy loss: -0.0001  | Total Loss: 0.22 | Total Steps: 36

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1638/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.05 | Critic loss: 0.51 | Entropy loss: -0.0018  | Total Loss: 0.56 | Total Steps: 42

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1639/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.34 | Entropy loss: -0.0001  | Total Loss: 0.34 | Total Steps: 31

---green prism---
Agent 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1669/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: -0.00 | Critic loss: 0.34 | Entropy loss: -0.0000  | Total Loss: 0.34 | Total Steps: 31

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1670/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: -0.02 | Critic loss: 0.14 | Entropy loss: -0.0011  | Total Loss: 0.12 | Total Steps: 37

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1671/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: 0.00 | Critic loss: 0.04 | Entropy loss: -0.0000  | Total Loss: 0.04 | Total Steps: 36

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1672/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: 0.01 | Critic loss: 0.41 | Entropy loss: -0.0008  | Total Loss: 0.42 | Total Steps: 41

---yellow sphere--

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 1/100  | Episode Reward: 8  | Average Reward 8.60  | Actor loss: 0.00 | Critic loss: 0.17 | Entropy loss: -0.0153  | Total Loss: 0.15 | Total Steps: 44
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 10  | Average Reward 8.60  | Actor loss: 0.00 | Critic loss: 0.02 | Entropy loss: -0.0026  | Total Loss: 0.02 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3/100  | Episode Reward: 10  | Average Reward 8.72  | Actor loss: 0.00 | Critic loss: 0.25 | Entropy loss: -0.0012  | Total Loss: 0.25 | Total Steps: 31
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 5  | Average R

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 5  | Average Reward 8.34  | Actor loss: 0.03 | Critic loss: 6.11 | Entropy loss: -0.0022  | Total Loss: 6.13 | Total Steps: 58
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 30/100  | Episode Reward: 10  | Average Reward 8.34  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0004  | Total Loss: 0.00 | Total Steps: 6
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 31/100  | Episode Reward: 10  | Average Reward 8.34  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0160  | Total Loss: -0.01 | Total Steps: 37
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 32/100  | Episode Reward: 10  | Average Reward 8.34  | Actor loss: 0

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 59/100  | Episode Reward: 10  | Average Reward 8.02  | Actor loss: 0.01 | Critic loss: 1.07 | Entropy loss: -0.0042  | Total Loss: 1.07 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 60/100  | Episode Reward: 10  | Average Reward 8.02  | Actor loss: 0.00 | Critic loss: 0.04 | Entropy loss: -0.0005  | Total Loss: 0.04 | Total Steps: 31
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 61/100  | Episode Reward: 10  | Average Reward 8.02  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0002  | Total Loss: 0.00 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 62/100  | Episode Reward: 5  | Average Reward 7.99  | Actor loss:

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 90/100  | Episode Reward: 10  | Average Reward 8.31  | Actor loss: 0.00 | Critic loss: 0.87 | Entropy loss: -0.0016  | Total Loss: 0.87 | Total Steps: 31
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 91/100  | Episode Reward: 2  | Average Reward 8.29  | Actor loss: 0.00 | Critic loss: 0.06 | Entropy loss: -0.0042  | Total Loss: 0.05 | Total Steps: 51
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 92/100  | Episode Reward: 10  | Average Reward 8.34  | Actor loss: 0.04 | Critic loss: 4.24 | Entropy loss: -0.0013  | Total Loss: 4.29 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: 10  | A

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1723/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: 0.00 | Critic loss: 0.44 | Entropy loss: -0.0000  | Total Loss: 0.44 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1724/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: 0.01 | Critic loss: 0.11 | Entropy loss: -0.0011  | Total Loss: 0.12 | Total Steps: 37

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1725/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: 0.03 | Critic loss: 1.03 | Entropy loss: -0.0012  | Total Loss: 1.05 | Total Steps: 30

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1726/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: -0.00 | Critic loss: 0.33 | Entropy loss: -0.0001  | Total Loss: 0.33 | Total Steps: 41

---black capsul

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1756/68000  | Episode Reward: 10  | Average Reward 9.74  | Actor loss: 0.01 | Critic loss: 0.32 | Entropy loss: -0.0002  | Total Loss: 0.32 | Total Steps: 36

---black cube---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1757/68000  | Episode Reward: 8  | Average Reward 9.71  | Actor loss: -0.23 | Critic loss: 1.46 | Entropy loss: -0.0031  | Total Loss: 1.23 | Total Steps: 42

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1758/68000  | Episode Reward: 10  | Average Reward 9.71  | Actor loss: 0.00 | Critic loss: 0.11 | Entropy loss: -0.0001  | Total Loss: 0.11 | Total Steps: 36

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1759/68000  | Episode Reward: 10  | Average Reward 9.71  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1789/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.10 | Critic loss: 0.24 | Entropy loss: -0.0021  | Total Loss: 0.34 | Total Steps: 39

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1790/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.35 | Entropy loss: -0.0000  | Total Loss: 0.35 | Total Steps: 31

---red prism---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1791/68000  | Episode Reward: 8  | Average Reward 9.75  | Actor loss: -0.27 | Critic loss: 2.97 | Entropy loss: -0.0031  | Total Loss: 2.70 | Total Steps: 43

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1792/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.00 | Critic loss: 0.26 | Entropy loss: -0.0002  | Total Loss: 0.26 | Total Steps: 41



TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: 5  | Average Reward 8.54  | Actor loss: 0.04 | Critic loss: 3.37 | Entropy loss: -0.0033  | Total Loss: 3.41 | Total Steps: 87
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 10  | Average Reward 8.54  | Actor loss: 0.00 | Critic loss: 0.09 | Entropy loss: -0.0026  | Total Loss: 0.09 | Total Steps: 37
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 21/100  | Episode Reward: 10  | Average Reward 8.54  | Actor loss: 0.01 | Critic loss: 0.14 | Entropy loss: -0.0006  | Total Loss: 0.16 | Total Steps: 36
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 8  | Ave

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 45/100  | Episode Reward: 10  | Average Reward 7.95  | Actor loss: 0.02 | Critic loss: 0.33 | Entropy loss: -0.0006  | Total Loss: 0.35 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 46/100  | Episode Reward: 10  | Average Reward 7.95  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0002  | Total Loss: 0.00 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 47/100  | Episode Reward: 8  | Average Reward 7.92  | Actor loss: 0.05 | Critic loss: 1.72 | Entropy loss: -0.0135  | Total Loss: 1.76 | Total Steps: 39
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: 8  | Average Reward 7.92  | Actor loss: 0.06

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 74/100  | Episode Reward: 10  | Average Reward 7.97  | Actor loss: 0.00 | Critic loss: 0.27 | Entropy loss: -0.0024  | Total Loss: 0.27 | Total Steps: 31
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 75/100  | Episode Reward: 10  | Average Reward 7.97  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0005  | Total Loss: 0.00 | Total Steps: 6
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 76/100  | Episode Reward: 5  | Average Reward 7.92  | Actor loss: 0.02 | Critic loss: 0.47 | Entropy loss: -0.0132  | Total Loss: 0.48 | Total Steps: 72
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 77/100  | Episode Reward: 10  | Average Reward 7.92  | Actor loss: 0.0

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1804/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: -0.01 | Critic loss: 0.04 | Entropy loss: -0.0013  | Total Loss: 0.03 | Total Steps: 37

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1805/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0008  | Total Loss: 0.08 | Total Steps: 37

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1806/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.58 | Entropy loss: -0.0001  | Total Loss: 0.58 | Total Steps: 31

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1807/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow capsul

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1838/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.19 | Critic loss: 0.36 | Entropy loss: -0.0023  | Total Loss: 0.55 | Total Steps: 40

---blue cube---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1839/68000  | Episode Reward: 8  | Average Reward 9.72  | Actor loss: -0.19 | Critic loss: 3.11 | Entropy loss: -0.0010  | Total Loss: 2.92 | Total Steps: 47

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1840/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.05 | Critic loss: 0.39 | Entropy loss: -0.0006  | Total Loss: 0.44 | Total Steps: 39

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1841/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1872/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.46 | Critic loss: 5.37 | Entropy loss: -0.0027  | Total Loss: 4.91 | Total Steps: 64

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1873/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.01 | Critic loss: 0.23 | Entropy loss: -0.0001  | Total Loss: 0.24 | Total Steps: 37

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1874/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.06 | Critic loss: 0.67 | Entropy loss: -0.0004  | Total Loss: 0.73 | Total Steps: 36

---red sphere---
Decision Step reward: -2.5
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1875/68000  | Episode Reward: 5  | Average Reward 9.65  | Actor loss: -0.18 | Critic loss: 8.89 | Entropy loss: -0.0026  | To

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 8  | Average Reward 7.70  | Actor loss: 0.00 | Critic loss: 0.39 | Entropy loss: -0.0092  | Total Loss: 0.38 | Total Steps: 41
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3/100  | Episode Reward: 10  | Average Reward 7.70  | Actor loss: 0.00 | Critic loss: 2.38 | Entropy loss: -0.0100  | Total Loss: 2.37 | Total Steps: 36
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 10  | Average Reward 7.70  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0017  | Total Loss: 0.02 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 5/100  | Episode Reward: 10  | Average Reward 7.70  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy los

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 2  | Average Reward 6.88  | Actor loss: 0.00 | Critic loss: 0.68 | Entropy loss: -0.0122  | Total Loss: 0.67 | Total Steps: 112
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 8  | Average Reward 6.97  | Actor loss: 0.03 | Critic loss: 5.20 | Entropy loss: -0.0050  | Total Loss: 5.23 | Total Steps: 143
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 10  | Average Reward 7.00  | Actor loss: 0.00 | Critic loss: 1.64 | Entropy loss: -0.0017  | Total Loss: 1.64 | Total Steps: 6
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step re

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 10  | Average Reward 7.47  | Actor loss: 0.08 | Critic loss: 2.41 | Entropy loss: -0.0098  | Total Loss: 2.48 | Total Steps: 39
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: 10  | Average Reward 7.47  | Actor loss: 0.01 | Critic loss: 3.41 | Entropy loss: -0.0010  | Total Loss: 3.42 | Total Steps: 36
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100  | Episode Reward: 5  | Average Reward 7.47  | Actor loss: 2.70 | Critic loss: 13.41 | Entropy loss: -0.0171  | Total Loss: 16.08 | Total Steps: 79
TEST: ---yellow prism---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episo

TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 85/100  | Episode Reward: 10  | Average Reward 7.35  | Actor loss: 1.19 | Critic loss: 10.78 | Entropy loss: -0.0071  | Total Loss: 11.96 | Total Steps: 183
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 86/100  | Episode Reward: 10  | Average Reward 7.35  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0012  | Total Loss: 0.01 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 87/100  | Episode Reward: 10  | Average Reward 7.38  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0002  | Total Loss: 0.01 | Total Steps: 6
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 88/100  | Episode Reward: 8  | Average Reward 7.38  | Actor loss: 0.00 | Critic l

Training  | Episode: 1917/68000  | Episode Reward: 10  | Average Reward 9.05  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1918/68000  | Episode Reward: 10  | Average Reward 9.05  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1919/68000  | Episode Reward: 10  | Average Reward 9.05  | Actor loss: 0.03 | Critic loss: 0.31 | Entropy loss: -0.0019  | Total Loss: 0.34 | Total Steps: 42

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1920/68000  | Episode Reward: 10  | Average Reward 9.07  | Actor loss: 0.01 | Critic loss: 0.24 | Entropy loss: -0.0002  | Total Loss: 0.26 | Total Steps: 37

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1950/68000  | Episode Reward: 10  | Average Reward 9.07  | Actor loss: 0.00 | Critic loss: 1.03 | Entropy loss: -0.0000  | Total Loss: 1.04 | Total Steps: 31

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1951/68000  | Episode Reward: 10  | Average Reward 9.07  | Actor loss: 0.00 | Critic loss: 0.29 | Entropy loss: -0.0001  | Total Loss: 0.29 | Total Steps: 37

---red cylinder---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1952/68000  | Episode Reward: 8  | Average Reward 9.05  | Actor loss: -0.09 | Critic loss: 0.70 | Entropy loss: -0.0011  | Total Loss: 0.62 | Total Steps: 37

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1953/68000  | Episode Reward: 10  | Average Reward 9.05  | Actor loss: 0.02 | Critic loss: 0.17 | Entropy loss: -0.0003  | Total Loss: 0.19 | Total Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1983/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.01 | Critic loss: 0.19 | Entropy loss: -0.0002  | Total Loss: 0.20 | Total Steps: 37

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1984/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1985/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.06 | Critic loss: 0.09 | Entropy loss: -0.0033  | Total Loss: 0.03 | Total Steps: 41

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1986/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.01 | Critic loss: 0.13 | Entropy loss: -0.0001  | Total Loss: 0.14 | Total Steps: 36

---green prism--

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 15/100  | Episode Reward: 10  | Average Reward 7.75  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0001  | Total Loss: 0.00 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 16/100  | Episode Reward: 10  | Average Reward 7.75  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0001  | Total Loss: 0.05 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: 10  | Average Reward 7.75  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0001  | Total Loss: 0.00 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 10  | Average Reward 7.75  | Actor loss: 0.00 | Critic loss: 0.06 | Entropy loss: -0.0012  | Total Loss: 

TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 46/100  | Episode Reward: 10  | Average Reward 8.53  | Actor loss: 0.01 | Critic loss: 0.24 | Entropy loss: -0.0017  | Total Loss: 0.24 | Total Steps: 156
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 47/100  | Episode Reward: 5  | Average Reward 8.72  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0094  | Total Loss: 0.08 | Total Steps: 47
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: 5  | Average Reward 8.68  | Actor loss: 0.00 | Critic loss: 0.24 | Entropy loss: -0.0019  | Total Loss: 0.24 | Total Steps: 42
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 77/100  | Episode Reward: 8  | Average Reward 8.72  | Actor loss: 0.00 | Critic loss: 0.29 | Entropy loss: -0.0013  | Total Loss: 0.29 | Total Steps: 38
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episode Reward: 10  | Average Reward 8.72  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0036  | Total Loss: -0.00 | Total Steps: 66
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: 10  | Average Reward 8.95  | Actor loss: 0.00 | Critic loss: 0.09 | Entropy loss: -0.0005  | Total Loss: 0.10 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 10  | Average Reward 8.95  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy

Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2010/68000  | Episode Reward: 8  | Average Reward 9.68  | Actor loss: -0.23 | Critic loss: 3.60 | Entropy loss: -0.0017  | Total Loss: 3.36 | Total Steps: 75

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2011/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.53 | Entropy loss: -0.0000  | Total Loss: 0.53 | Total Steps: 31

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2012/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.79 | Entropy loss: -0.0001  | Total Loss: 0.80 | Total Steps: 31

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2013/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.75 | Entropy loss: -0.0000  | Total Loss: 0.75 | Total Steps:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2043/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.09 | Entropy loss: -0.0001  | Total Loss: 0.09 | Total Steps: 36

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2044/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: -0.01 | Critic loss: 0.38 | Entropy loss: -0.0003  | Total Loss: 0.37 | Total Steps: 41

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2045/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow capsule---
Decision Step reward: -2.5
Decision Step reward: -2.5
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2046/68000  | Episode Reward: 2  | Average Reward 9.70  | Actor loss: -0.29 | Critic loss:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2076/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.00 | Critic loss: 0.63 | Entropy loss: -0.0000  | Total Loss: 0.63 | Total Steps: 31

---blue capsule---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2077/68000  | Episode Reward: 8  | Average Reward 9.70  | Actor loss: -0.05 | Critic loss: 3.94 | Entropy loss: -0.0002  | Total Loss: 3.90 | Total Steps: 47

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2078/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0000  | Total Loss: 0.05 | Total Steps: 6

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2079/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.01 | Critic loss: 0.42 | Entropy loss: -0.0028  | Total Loss: 0.43 | Total Steps:

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 8/100  | Episode Reward: 10  | Average Reward 9.12  | Actor loss: 2.88 | Critic loss: 1.31 | Entropy loss: -0.0075  | Total Loss: 4.18 | Total Steps: 36
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 9/100  | Episode Reward: 10  | Average Reward 9.12  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0014  | Total Loss: -0.00 | Total Steps: 6
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 10/100  | Episode Reward: 0  | Average Reward 9.03  | Actor loss: 0.00 | Critic loss: 2.32 | Entropy loss: -0.0181  | Total Loss: 2.31 | Total Steps: 82
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 11/

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 38/100  | Episode Reward: 10  | Average Reward 9.22  | Actor loss: 0.03 | Critic loss: 1.60 | Entropy loss: -0.0032  | Total Loss: 1.62 | Total Steps: 96
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 39/100  | Episode Reward: 10  | Average Reward 9.25  | Actor loss: 0.00 | Critic loss: 0.63 | Entropy loss: -0.0012  | Total Loss: 0.63 | Total Steps: 41
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 10  | Average Reward 9.25  | Actor loss: 0.00 | Critic loss: 0.13 | Entropy loss: -0.0003  | Total Loss: 0.13 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 10  | Average Reward 9.25  | Actor loss: 0.01 | Critic loss: 2.10 | Entropy loss: -0.0021  | Total Loss: 

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Step: 200
TEST: Step: 300
TEST: Step: 400
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 69/100  | Episode Reward: -15  | Average Reward 9.22  | Actor loss: -0.01 | Critic loss: 96.15 | Entropy loss: -0.0039  | Total Loss: 96.13 | Total Steps: 500
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 10  | Average Reward 9.22  | Actor loss: 0.02 | Critic loss: 0.35 | Entropy loss: -0.0005  | Total Loss: 0.37 | Total Steps: 36
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 10  | Average Reward 9.22  | Actor loss: 0.00 | Critic loss: 0.17 | Entropy loss: -0.0002  | Total Loss: 0.17 | Total Steps: 6
TEST: ---yellow prism---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Test

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 8  | Average Reward 8.88  | Actor loss: 0.08 | Critic loss: 4.40 | Entropy loss: -0.0116  | Total Loss: 4.47 | Total Steps: 42
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 10  | Average Reward 8.88  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0012  | Total Loss: 0.08 | Total Steps: 31
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 10  | Average Reward 8.90  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0003  | Total Loss: 0.00 | Total Steps: 6

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2102/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.06 | Critic loss: 0.76 | Entropy loss: -0.0010  | Total Loss: 0.81 | Total Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2132/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.12 | Critic loss: 0.77 | Entropy loss: -0.0014  | Total Loss: 0.89 | Total Steps: 42

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2133/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.01 | Critic loss: 0.17 | Entropy loss: -0.0006  | Total Loss: 0.18 | Total Steps: 41

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2134/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: -0.02 | Critic loss: 2.28 | Entropy loss: -0.0002  | Total Loss: 2.26 | Total Steps: 49

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2135/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: 0.00 | Critic loss: 0.18 | Entropy loss: -0.0001  | Total Loss: 0.18 | Total Steps: 36

---yellow cube---
De

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2165/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.00 | Critic loss: 0.51 | Entropy loss: -0.0000  | Total Loss: 0.51 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2166/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2167/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2168/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---black p

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2198/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.00 | Critic loss: 0.38 | Entropy loss: -0.0000  | Total Loss: 0.38 | Total Steps: 31

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2199/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.07 | Critic loss: 1.39 | Entropy loss: -0.0007  | Total Loss: 1.31 | Total Steps: 47

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2200/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.00 | Critic loss: 0.41 | Entropy loss: -0.0000  | Total Loss: 0.41 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2201/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.02 | Critic loss: 0.11 | Entropy loss: -0.0022  | Total Loss: 0.13 | Total Steps: 36
TEST: ---green

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 10  | Average Reward 8.85  | Actor loss: 0.01 | Critic loss: 0.24 | Entropy loss: -0.0008  | Total Loss: 0.24 | Total Steps: 36
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 10  | Average Reward 8.88  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0002  | Total Loss: 0.00 | Total Steps: 6
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 30/100  | Episode Reward: 8  | Average Reward 8.85  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0018  | Total Loss: 0.01 | Total Steps: 29
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 31/100  | Episode Reward: 10  | Average Reward 8.85  | Actor loss: 0.00 | Critic loss: 1.44 | Entropy 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100  | Episode Reward: 10  | Average Reward 8.72  | Actor loss: 0.16 | Critic loss: 3.74 | Entropy loss: -0.0059  | Total Loss: 3.89 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 59/100  | Episode Reward: 10  | Average Reward 8.72  | Actor loss: 0.00 | Critic loss: 0.74 | Entropy loss: -0.0041  | Total Loss: 0.73 | Total Steps: 66
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 60/100  | Episode Reward: 5  | Average Reward 8.68  | Actor loss: 0.00 | Critic loss: 0.13 | Entropy loss: -0.0145  | Total Loss: 0.12 | Total Steps: 47
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step rew

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 89/100  | Episode Reward: 10  | Average Reward 8.93  | Actor loss: 0.00 | Critic loss: 0.55 | Entropy loss: -0.0002  | Total Loss: 0.55 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 90/100  | Episode Reward: 10  | Average Reward 8.95  | Actor loss: 0.00 | Critic loss: 0.04 | Entropy loss: -0.0008  | Total Loss: 0.04 | Total Steps: 36
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 91/100  | Episode Reward: 10  | Average Reward 8.95  | Actor loss: 0.00 | Critic loss: 1.57 | Entropy loss: -0.0021  | Total Loss: 1.57 | Total Steps: 6
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 92/100  | Episode Reward: 8  | Average Reward 8.93  | Actor loss: 0.00 | Critic loss: 0.49 | Entropy


---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2222/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2223/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.10 | Entropy loss: -0.0001  | Total Loss: 0.10 | Total Steps: 36

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2224/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.01 | Critic loss: 0.31 | Entropy loss: -0.0002  | Total Loss: 0.31 | Total Steps: 37

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2225/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.08 | Critic loss: 0.24 | Entropy loss: -0.0023  | Total Loss: 0.16 | Total Steps: 41

---bl

Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2256/68000  | Episode Reward: 8  | Average Reward 9.70  | Actor loss: -0.16 | Critic loss: 1.76 | Entropy loss: -0.0010  | Total Loss: 1.59 | Total Steps: 41

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2257/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.88 | Critic loss: 4.69 | Entropy loss: -0.0067  | Total Loss: 3.80 | Total Steps: 72

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2258/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.31 | Entropy loss: -0.0001  | Total Loss: 0.31 | Total Steps: 31

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2259/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.00 | Critic loss: 0.61 | Entropy loss: -0.0001  | Total Loss: 0.61 | Total Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2289/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0000  | Total Loss: 0.08 | Total Steps: 36

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2290/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2291/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.02 | Critic loss: 0.26 | Entropy loss: -0.0002  | Total Loss: 0.28 | Total Steps: 36

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2292/68000  | Episode Reward: 10  | Average Reward 9.75  | Actor loss: 0.00 | Critic loss: 0.10 | Entropy loss: -0.0001  | Total Loss: 0.10 | Total Steps: 41

---red cylinder---
Agent

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: 10  | Average Reward 8.72  | Actor loss: 0.00 | Critic loss: 0.80 | Entropy loss: -0.0106  | Total Loss: 0.79 | Total Steps: 32
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 5  | Average Reward 8.68  | Actor loss: 0.00 | Critic loss: 0.38 | Entropy loss: -0.0056  | Total Loss: 0.37 | Total Steps: 64
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 21/100  | Episode Reward: 8  | Average Reward 8.65  | Actor loss: 0.08 | Critic loss: 3.12 | Entropy loss: -0.0029  | Total Loss: 3.20 | Total Steps: 38
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 10  | A

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: 2  | Average Reward 8.32  | Actor loss: 0.00 | Critic loss: 0.14 | Entropy loss: -0.0008  | Total Loss: 0.15 | Total Steps: 52
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 49/100  | Episode Reward: 10  | Average Reward 8.38  | Actor loss: 0.15 | Critic loss: 6.69 | Entropy loss: -0.0117  | Total Loss: 6.83 | Total Steps: 37
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 50/100  | Episode Reward: 8  | Average Reward 8.40  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0091  | Total Loss: -0.01 | Total Steps: 47
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode:

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 77/100  | Episode Reward: 10  | Average Reward 8.38  | Actor loss: 2.76 | Critic loss: 11.67 | Entropy loss: -0.0223  | Total Loss: 14.41 | Total Steps: 73
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episode Reward: 10  | Average Reward 8.38  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0016  | Total Loss: 0.07 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: 10  | Average Reward 8.45  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0003  | Total Loss: -0.00 | Total Steps: 6
TEST: ---yellow prism---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 8  | Average Reward 8.43  | Actor loss: 0.15 | Critic loss: 0.32 | Entr

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2308/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.53 | Entropy loss: -0.0000  | Total Loss: 0.53 | Total Steps: 31

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2309/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.13 | Entropy loss: -0.0001  | Total Loss: 0.13 | Total Steps: 36

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2310/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2311/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.01 | Critic loss: 0.08 | Entropy loss: -0.0002  | Total Loss: 0.09 | Total Steps: 37

---blue cube---
A

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2342/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.01 | Critic loss: 0.19 | Entropy loss: -0.0004  | Total Loss: 0.20 | Total Steps: 41

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2343/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: 0.00 | Critic loss: 0.68 | Entropy loss: -0.0001  | Total Loss: 0.68 | Total Steps: 31

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2344/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: -0.07 | Critic loss: 0.47 | Entropy loss: -0.0008  | Total Loss: 0.40 | Total Steps: 36

---red sphere---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2345/68000  | Episode Reward: 8  | Average Reward 9.80  | Actor loss: -0.40 | Critic loss: 4.88 | Entropy loss: -0.0020  | Total Loss: 4.48 | Total Steps: 4

Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2374/68000  | Episode Reward: 8  | Average Reward 9.62  | Actor loss: -0.08 | Critic loss: 0.44 | Entropy loss: -0.0013  | Total Loss: 0.36 | Total Steps: 37

---green prism---
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2375/68000  | Episode Reward: 9  | Average Reward 9.62  | Actor loss: 0.29 | Critic loss: 1.98 | Entropy loss: -0.0046  | Total Loss: 2.26 | Total Steps: 32

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2376/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.08 | Critic loss: 0.32 | Entropy loss: -0.0009  | Total Loss: 0.40 | Total Steps: 36

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2377/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 5/100  | Episode Reward: 10  | Average Reward 8.25  | Actor loss: 0.00 | Critic loss: 0.14 | Entropy loss: -0.0058  | Total Loss: 0.13 | Total Steps: 39
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 6/100  | Episode Reward: 10  | Average Reward 8.25  | Actor loss: 0.23 | Critic loss: 0.36 | Entropy loss: -0.0018  | Total Loss: 0.59 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 7/100  | Episode Reward: 10  | Average Reward 8.30  | Actor loss: 0.00 | Critic loss: 0.15 | Entropy loss: -0.0009  | Total Loss: 0.15 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 8/100  | Episode Reward: 10  | Average Reward 8.32  | Actor loss: 1.07 | Critic loss: 7.80 | Entropy loss: -0.0070  | Total Loss: 8.86 

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 36/100  | Episode Reward: 5  | Average Reward 8.57  | Actor loss: 0.04 | Critic loss: 1.46 | Entropy loss: -0.0088  | Total Loss: 1.49 | Total Steps: 69
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 37/100  | Episode Reward: 10  | Average Reward 8.72  | Actor loss: 5.14 | Critic loss: 12.20 | Entropy loss: -0.0113  | Total Loss: 17.32 | Total Steps: 53
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 38/100  | Episode Reward: 5  | Average Reward 8.68  | Actor loss: 0.00 | Critic loss: 0.13 | Entropy loss: -0.0071  | Total Loss: 0.12 | Total Steps: 43
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 64/100  | Episode Reward: 5  | Average Reward 8.38  | Actor loss: 0.49 | Critic loss: 2.45 | Entropy loss: -0.0111  | Total Loss: 2.94 | Total Steps: 56
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 65/100  | Episode Reward: 10  | Average Reward 8.40  | Actor loss: 0.23 | Critic loss: 0.35 | Entropy loss: -0.0019  | Total Loss: 0.58 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 66/100  | Episode Reward: 10  | Average Reward 8.40  | Actor loss: 0.00 | Critic loss: 0.73 | Entropy loss: -0.0057  | Total Loss: 0.72 | Total Steps: 30
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 67/100  | Episode Reward: 10  | Average Reward 8.40  | Actor loss: 0.00 | Critic loss: 0.13 | Entropy

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: 10  | Average Reward 8.38  | Actor loss: 0.00 | Critic loss: 0.94 | Entropy loss: -0.0024  | Total Loss: 0.94 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: -10  | Average Reward 8.18  | Actor loss: 0.21 | Critic loss: 5.24 | Entropy loss: -0.0138  | Total Loss: 5.43 | Total Steps: 141
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 10  | Average Reward 8.18  | Actor loss: 0.00 | Critic loss: 0.12 | Entropy loss: -0

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2425/68000  | Episode Reward: 10  | Average Reward 9.54  | Actor loss: -0.20 | Critic loss: 3.52 | Entropy loss: -0.0011  | Total Loss: 3.32 | Total Steps: 55

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2426/68000  | Episode Reward: 10  | Average Reward 9.54  | Actor loss: 0.17 | Critic loss: 1.91 | Entropy loss: -0.0013  | Total Loss: 2.08 | Total Steps: 30

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2427/68000  | Episode Reward: 10  | Average Reward 9.54  | Actor loss: 0.06 | Critic loss: 0.86 | Entropy loss: -0.0005  | Total Loss: 0.92 | Total Steps: 31

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2428/68000  | Episode Reward: 10  | Average Reward 9.54  | Actor loss: 0.00 | Critic loss: 0.15 | Entropy loss: -0.0001  | Total Loss: 0.15 | Total Steps: 36

---red prism---
Decision

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2458/68000  | Episode Reward: 10  | Average Reward 9.64  | Actor loss: -0.08 | Critic loss: 0.11 | Entropy loss: -0.0022  | Total Loss: 0.03 | Total Steps: 37

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2459/68000  | Episode Reward: 10  | Average Reward 9.64  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2460/68000  | Episode Reward: 10  | Average Reward 9.64  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2461/68000  | Episode Reward: 10  | Average Reward 9.64  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---red cylinder---
Ag

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2492/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.32 | Critic loss: 0.39 | Entropy loss: -0.0026  | Total Loss: 0.71 | Total Steps: 37

---red prism---
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2493/68000  | Episode Reward: 9  | Average Reward 9.69  | Actor loss: -0.16 | Critic loss: 0.77 | Entropy loss: -0.0034  | Total Loss: 0.60 | Total Steps: 33

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2494/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.00 | Critic loss: 0.35 | Entropy loss: -0.0001  | Total Loss: 0.36 | Total Steps: 31

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2495/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: -0.06 | Critic loss: 0.10 | Entropy loss: -0.0033  | Total Loss: 0.04 | Total Steps:

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 8  | Average Reward 8.10  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0052  | Total Loss: 0.04 | Total Steps: 30
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 23/100  | Episode Reward: 10  | Average Reward 8.12  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0007  | Total Loss: 0.02 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 24/100  | Episode Reward: 10  | Average Reward 8.12  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0082  | Total Loss: 0.00 | Total Steps: 32
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 25/100  | Episode Reward: 10  | Average Reward 8.12  | Actor loss: 0.00 | Critic loss: 1.78 | Entropy loss: -0.0056  | Total Loss: 1.

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 52/100  | Episode Reward: 5  | Average Reward 8.57  | Actor loss: 0.00 | Critic loss: 1.03 | Entropy loss: -0.0047  | Total Loss: 1.03 | Total Steps: 57
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 53/100  | Episode Reward: 10  | Average Reward 8.57  | Actor loss: 0.00 | Critic loss: 0.19 | Entropy loss: -0.0003  | Total Loss: 0.19 | Total Steps: 31
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 54/100  | Episode Reward: 10  | Average Reward 8.57  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0004  | Total Loss: 0.01 | Total Steps: 31
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 55/100  | Episode Reward: 10  | Average Reward 8.57  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0008  | Total Loss: 

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 82/100  | Episode Reward: 8  | Average Reward 8.59  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0054  | Total Loss: -0.00 | Total Steps: 47
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 83/100  | Episode Reward: 10  | Average Reward 8.59  | Actor loss: 0.03 | Critic loss: 1.18 | Entropy loss: -0.0132  | Total Loss: 1.19 | Total Steps: 39
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 84/100  | Episode Reward: 8  | Average Reward 8.56  | Actor loss: 0.00 | Critic loss: 0.23 | Entropy loss: -0.0036  | Total Loss: 0.22 | Total Steps: 29
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 85/100  | Episode Reward: 10  | Average Reward 8.56  | Actor loss

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2513/68000  | Episode Reward: 10  | Average Reward 9.74  | Actor loss: 0.19 | Critic loss: 0.34 | Entropy loss: -0.0028  | Total Loss: 0.53 | Total Steps: 40

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2514/68000  | Episode Reward: 10  | Average Reward 9.74  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2515/68000  | Episode Reward: 10  | Average Reward 9.74  | Actor loss: -0.00 | Critic loss: 0.52 | Entropy loss: -0.0000  | Total Loss: 0.52 | Total Steps: 31

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2516/68000  | Episode Reward: 10  | Average Reward 9.74  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---black cylinder---
Agen

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2546/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: -0.00 | Critic loss: 0.32 | Entropy loss: -0.0003  | Total Loss: 0.32 | Total Steps: 41

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2547/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2548/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0000  | Total Loss: 0.02 | Total Steps: 6

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2549/68000  | Episode Reward: 10  | Average Reward 9.79  | Actor loss: -0.09 | Critic loss: 0.13 | Entropy loss: -0.0027  | Total Loss: 0.04 | Total Steps: 6

---yellow capsule---
De

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2579/68000  | Episode Reward: 10  | Average Reward 9.81  | Actor loss: -0.01 | Critic loss: 0.27 | Entropy loss: -0.0008  | Total Loss: 0.26 | Total Steps: 32

---black cube---
Decision Step reward: -2.5
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2580/68000  | Episode Reward: 5  | Average Reward 9.77  | Actor loss: -0.66 | Critic loss: 7.48 | Entropy loss: -0.0027  | Total Loss: 6.82 | Total Steps: 46

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2581/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: 0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2582/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0005  | T

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 9/100  | Episode Reward: 5  | Average Reward 8.59  | Actor loss: 0.00 | Critic loss: 0.28 | Entropy loss: -0.0185  | Total Loss: 0.26 | Total Steps: 75
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 10/100  | Episode Reward: 10  | Average Reward 8.59  | Actor loss: 0.00 | Critic loss: 1.00 | Entropy loss: -0.0039  | Total Loss: 1.00 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 11/100  | Episode Reward: 8  | Average Reward 8.56  | Actor loss: 0.03 | Critic loss: 5.79 | Entropy loss: -0.0083  | Total Loss: 5.81 | Total Steps: 86
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 1

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 38/100  | Episode Reward: 8  | Average Reward 8.31  | Actor loss: 0.00 | Critic loss: 0.06 | Entropy loss: -0.0175  | Total Loss: 0.04 | Total Steps: 31
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 39/100  | Episode Reward: 8  | Average Reward 8.29  | Actor loss: 0.05 | Critic loss: 4.48 | Entropy loss: -0.0151  | Total Loss: 4.51 | Total Steps: 46
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 10  | Average Reward 8.34  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0026  | Total Loss: 0.01 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 10  | Average Reward 8.34  | Actor loss: 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 68/100  | Episode Reward: 10  | Average Reward 8.37  | Actor loss: 0.00 | Critic loss: 0.56 | Entropy loss: -0.0007  | Total Loss: 0.56 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 69/100  | Episode Reward: 10  | Average Reward 8.37  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0002  | Total Loss: 0.00 | Total Steps: 6
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 10  | Average Reward 8.37  | Actor loss: 0.00 | Critic loss: 2.14 | Entropy loss: -0.0016  | Total Loss: 2.14 | Total Steps: 6
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 8  | Average Reward 8.34  | Actor loss: 0.09 | Critic loss: 0.40 | Entropy los

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 10  | Average Reward 8.50  | Actor loss: 0.33 | Critic loss: 0.51 | Entropy loss: -0.0028  | Total Loss: 0.84 | Total Steps: 36
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: 0.00 | Critic loss: 0.06 | Entropy loss: -0.0216  | Total Loss: 0.04 | Total Steps: 34
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0036  | Total Loss: -0.00 | Total Steps: 6

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2602/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2632/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.00 | Critic loss: 0.66 | Entropy loss: -0.0000  | Total Loss: 0.66 | Total Steps: 31

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2633/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.00 | Critic loss: 0.77 | Entropy loss: -0.0000  | Total Loss: 0.77 | Total Steps: 31

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2634/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.01 | Critic loss: 0.29 | Entropy loss: -0.0001  | Total Loss: 0.30 | Total Steps: 36

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2635/68000  | Episode Reward: 10  | Average Reward 9.69  | Actor loss: 0.01 | Critic loss: 0.09 | Entropy loss: -0.0008  | Total Loss: 0.10 | Total Steps: 37

---blue cube---
Agent i

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2665/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: -0.04 | Critic loss: 1.50 | Entropy loss: -0.0004  | Total Loss: 1.46 | Total Steps: 47

---yellow capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2666/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.01 | Critic loss: 0.24 | Entropy loss: -0.0002  | Total Loss: 0.25 | Total Steps: 41

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2667/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.10 | Critic loss: 0.28 | Entropy loss: -0.0029  | Total Loss: 0.38 | Total Steps: 38

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2668/68000  | Episode Reward: 10  | Average Reward 9.62  | Actor loss: 0.03 | Critic loss: 0.17 | Entropy loss: -0.0025  | Total Loss: 0.20 | Total Steps: 40

---red sphere---
Age

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2699/68000  | Episode Reward: 10  | Average Reward 9.74  | Actor loss: 0.00 | Critic loss: 0.11 | Entropy loss: -0.0001  | Total Loss: 0.11 | Total Steps: 36

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2700/68000  | Episode Reward: 10  | Average Reward 9.74  | Actor loss: 0.00 | Critic loss: 0.10 | Entropy loss: -0.0001  | Total Loss: 0.11 | Total Steps: 41

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2701/68000  | Episode Reward: 10  | Average Reward 9.74  | Actor loss: -0.00 | Critic loss: 0.25 | Entropy loss: -0.0002  | Total Loss: 0.25 | Total Steps: 41
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 1/100  | Episode Reward: 10  | Average Reward 8.60  | Actor loss: 0.00 | Critic loss: 0.12 | Entropy loss: -0.0004  | Total Loss: 0.13 | Total Steps: 36
TEST: ---b

TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 10  | Average Reward 8.57  | Actor loss: 0.00 | Critic loss: 0.90 | Entropy loss: -0.0027  | Total Loss: 0.90 | Total Steps: 31
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 8  | Average Reward 8.60  | Actor loss: 0.12 | Critic loss: 1.81 | Entropy loss: -0.0223  | Total Loss: 1.91 | Total Steps: 53
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 30/100  | Episode Reward: 8  | Average Reward 8.57  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0033  | Total Loss: -0.00 | Total Steps: 46
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 31/100  |

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100  | Episode Reward: 10  | Average Reward 8.78  | Actor loss: 0.01 | Critic loss: 0.14 | Entropy loss: -0.0015  | Total Loss: 0.14 | Total Steps: 41
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 59/100  | Episode Reward: 10  | Average Reward 8.78  | Actor loss: 0.01 | Critic loss: 0.08 | Entropy loss: -0.0041  | Total Loss: 0.09 | Total Steps: 41
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 60/100  | Episode Reward: 5  | Average Reward 8.82  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0042  | Total Loss: 0.06 | Total Steps: 66
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 61/100  | Episode Reward: 10  | Average Reward 8.88  | Actor loss:

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 89/100  | Episode Reward: 10  | Average Reward 9.03  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0001  | Total Loss: 0.01 | Total Steps: 6
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 90/100  | Episode Reward: 8  | Average Reward 9.03  | Actor loss: 0.00 | Critic loss: 0.16 | Entropy loss: -0.0061  | Total Loss: 0.16 | Total Steps: 90
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 91/100  | Episode Reward: 10  | Average Reward 9.03  | Actor loss: 0.00 | Critic loss: 0.34 | Entropy loss: -0.0035  | Total Loss: 0.34 | Total Steps: 31
TEST: ---yellow prism---
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 92/100  | Episode Reward: 9  | Average Reward 9.04  | Actor loss: 

Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2722/68000  | Episode Reward: 8  | Average Reward 9.80  | Actor loss: -0.18 | Critic loss: 1.06 | Entropy loss: -0.0011  | Total Loss: 0.88 | Total Steps: 37

---green sphere---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2723/68000  | Episode Reward: 8  | Average Reward 9.78  | Actor loss: -0.03 | Critic loss: 3.57 | Entropy loss: -0.0001  | Total Loss: 3.54 | Total Steps: 47

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2724/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: -0.04 | Critic loss: 0.06 | Entropy loss: -0.0010  | Total Loss: 0.01 | Total Steps: 37

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2725/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.02 | Critic loss: 0.41 | Entropy loss: -0.0004  |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2755/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: -0.00 | Critic loss: 0.46 | Entropy loss: -0.0000  | Total Loss: 0.46 | Total Steps: 31

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2756/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: -0.03 | Critic loss: 0.28 | Entropy loss: -0.0019  | Total Loss: 0.25 | Total Steps: 41

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2757/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: -0.04 | Critic loss: 0.75 | Entropy loss: -0.0009  | Total Loss: 0.71 | Total Steps: 47

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2758/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.55 | Entropy loss: -0.0001  | Total Loss: 0.55 | Total Steps: 31

---green sphere--

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2788/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.03 | Critic loss: 0.36 | Entropy loss: -0.0008  | Total Loss: 0.33 | Total Steps: 32

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2789/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.00 | Critic loss: 0.54 | Entropy loss: -0.0000  | Total Loss: 0.54 | Total Steps: 31

---black prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2790/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: 0.00 | Critic loss: 0.10 | Entropy loss: -0.0001  | Total Loss: 0.10 | Total Steps: 37

---green sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2791/68000  | Episode Reward: 10  | Average Reward 9.88  | Actor loss: -0.00 | Critic loss: 0.34 | Entropy loss: -0.0000  | Total Loss: 0.33 | Total Steps: 31

---blue cube---
Dec

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: 10  | Average Reward 9.04  | Actor loss: 0.06 | Critic loss: 0.93 | Entropy loss: -0.0033  | Total Loss: 0.99 | Total Steps: 40
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 10  | Average Reward 9.04  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0014  | Total Loss: -0.00 | Total Steps: 41
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 21/100  | Episode Reward: 10  | Average Reward 9.04  | Actor loss: 0.00 | Critic loss: 1.56 | Entropy loss: -0.0025  | Total Loss: 1.56 | Total Steps: 30
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 8  | Average Reward 9.04  | Actor loss: 0.00 | Critic loss: 0.32 | Entro

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 49/100  | Episode Reward: 5  | Average Reward 8.84  | Actor loss: 0.00 | Critic loss: 0.61 | Entropy loss: -0.0120  | Total Loss: 0.60 | Total Steps: 47
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 50/100  | Episode Reward: 10  | Average Reward 8.84  | Actor loss: 0.13 | Critic loss: 4.05 | Entropy loss: -0.0040  | Total Loss: 4.18 | Total Steps: 41
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 51/100  | Episode Reward: 8  | Average Reward 8.81  | Actor loss: 0.03 | Critic loss: 5.31 | Entropy loss: -0.0056  | Total Loss: 5.33 | Total Steps: 43
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 52/100  | Episode Reward: 10  | Average Reward 8.84  | Actor loss: 1

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: 10  | Average Reward 8.64  | Actor loss: 0.00 | Critic loss: 0.24 | Entropy loss: -0.0006  | Total Loss: 0.24 | Total Steps: 31
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 10  | Average Reward 8.66  | Actor loss: 0.00 | Critic loss: 2.08 | Entropy loss: -0.0009  | Total Loss: 2.09 | Total Steps: 6
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 81/100  | Episode Reward: 10  | Average Reward 8.66  | Actor loss: 0.00 | Critic loss: 0.79 | Entropy loss: -0.0014  | Total Loss: 0.79 | Total Steps: 36
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 82/100  | Episode Reward: 10  | Average Reward 8.69  | Actor loss: 0.00 | Critic loss: 0.24 | Entropy loss: -0.0006  | Total Loss: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2811/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.00 | Critic loss: 0.58 | Entropy loss: -0.0000  | Total Loss: 0.58 | Total Steps: 31

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2812/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.08 | Critic loss: 1.14 | Entropy loss: -0.0007  | Total Loss: 1.22 | Total Steps: 33

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2813/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: 0.05 | Critic loss: 0.42 | Entropy loss: -0.0005  | Total Loss: 0.47 | Total Steps: 41

---yellow cube---
Decision Step reward: -2.5
Decision Step reward: -2.5
Decision Step reward: -2.5
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2814/68000  | Episode Reward: 0  | Average Reward 9.68  | Actor loss: -

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2844/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.26 | Entropy loss: -0.0001  | Total Loss: 0.26 | Total Steps: 36

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2845/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.03 | Critic loss: 0.27 | Entropy loss: -0.0004  | Total Loss: 0.30 | Total Steps: 41

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2846/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.05 | Critic loss: 0.26 | Entropy loss: -0.0012  | Total Loss: 0.22 | Total Steps: 32

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2847/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.50 | Entropy loss: -0.0000  | Total Loss: 0.51 | Total Steps: 31

---yellow cylinder

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2876/68000  | Episode Reward: 10  | Average Reward 9.56  | Actor loss: 0.00 | Critic loss: 0.87 | Entropy loss: -0.0000  | Total Loss: 0.87 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2877/68000  | Episode Reward: 10  | Average Reward 9.56  | Actor loss: 0.01 | Critic loss: 0.27 | Entropy loss: -0.0006  | Total Loss: 0.28 | Total Steps: 41

---red cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2878/68000  | Episode Reward: 10  | Average Reward 9.56  | Actor loss: -0.07 | Critic loss: 0.38 | Entropy loss: -0.0015  | Total Loss: 0.31 | Total Steps: 38

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2879/68000  | Episode Reward: 10  | Average Reward 9.56  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---yellow cube---
A

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 8/100  | Episode Reward: 10  | Average Reward 8.66  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0008  | Total Loss: 0.01 | Total Steps: 31
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 9/100  | Episode Reward: 10  | Average Reward 8.71  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0002  | Total Loss: 0.00 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 10/100  | Episode Reward: 10  | Average Reward 8.74  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0001  | Total Loss: 0.01 | Total Steps: 6
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 11/100  | Episode Reward: 10  | Average Reward 8.74  | Actor loss: 0.00 | Critic loss: 0.12 | Entropy loss: -0.0016  | Total Loss: 0.12 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 34/100  | Episode Reward: 10  | Average Reward 7.89  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0122  | Total Loss: -0.01 | Total Steps: 32
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 35/100  | Episode Reward: 10  | Average Reward 7.92  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0021  | Total Loss: 0.00 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 36/100  | Episode Reward: 10  | Average Reward 7.92  | Actor loss: 0.01 | Critic loss: 0.57 | Entropy loss: -0.0053  | Total Loss: 0.58 | Total Steps: 49
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 37/100  | Episode Reward: 2  | Aver

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 64/100  | Episode Reward: 10  | Average Reward 8.12  | Actor loss: 0.11 | Critic loss: 0.22 | Entropy loss: -0.0015  | Total Loss: 0.33 | Total Steps: 36
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 65/100  | Episode Reward: 8  | Average Reward 8.09  | Actor loss: 0.00 | Critic loss: 0.69 | Entropy loss: -0.0071  | Total Loss: 0.68 | Total Steps: 47
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 66/100  | Episode Reward: 10  | Average Reward 8.09  | Actor loss: 0.00 | Critic loss: 0.07 | Entropy loss: -0.0009  | Total Loss: 0.07 | Total Steps: 36
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 67/100  | Episode Reward: 8  | Average Reward 8.06  | Actor loss: 

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: 5  | Average Reward 7.74  | Actor loss: 0.00 | Critic loss: 0.27 | Entropy loss: -0.0010  | Total Loss: 0.27 | Total Steps: 63
TEST: ---black sphere---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: 5  | Average Reward 7.69  | Actor loss: 0.03 | Critic loss: 0.59 | Entropy loss: -0.0051  | Total Loss: 0.61 | Total Steps: 57
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 10  | Average Reward 7.69  | Actor loss: 0.00 | Critic loss: 0.62 | Entropy loss: -0.0022  | Total Loss: 0.61 | Total Steps: 31
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2926/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: 0.00 | Critic loss: 0.41 | Entropy loss: -0.0000  | Total Loss: 0.41 | Total Steps: 31

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2927/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: -0.01 | Critic loss: 0.41 | Entropy loss: -0.0008  | Total Loss: 0.40 | Total Steps: 41

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2928/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2929/68000  | Episode Reward: 10  | Average Reward 9.77  | Actor loss: 0.01 | Critic loss: 0.21 | Entropy loss: -0.0002  | Total Loss: 0.22 | Total Steps: 41

---red cube---
Agent i

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2959/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: -0.11 | Critic loss: 0.61 | Entropy loss: -0.0015  | Total Loss: 0.50 | Total Steps: 41

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2960/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: -0.00 | Critic loss: 0.67 | Entropy loss: -0.0001  | Total Loss: 0.67 | Total Steps: 31

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2961/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: -0.00 | Critic loss: 0.01 | Entropy loss: -0.0000  | Total Loss: 0.01 | Total Steps: 6

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2962/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: 0.00 | Critic loss: 0.11 | Entropy loss: -0.0002  | Total Loss: 0.11 | Total Steps: 36

---green cylinder---
Age

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2992/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: -0.00 | Critic loss: 0.38 | Entropy loss: -0.0000  | Total Loss: 0.38 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2993/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: 0.00 | Critic loss: 0.60 | Entropy loss: -0.0001  | Total Loss: 0.60 | Total Steps: 31

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2994/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: -0.08 | Critic loss: 0.29 | Entropy loss: -0.0017  | Total Loss: 0.21 | Total Steps: 41

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2995/68000  | Episode Reward: 10  | Average Reward 9.95  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---blue sphere-

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 23/100  | Episode Reward: 10  | Average Reward 8.35  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0004  | Total Loss: 0.03 | Total Steps: 6
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 24/100  | Episode Reward: 10  | Average Reward 8.35  | Actor loss: 0.00 | Critic loss: 0.69 | Entropy loss: -0.0006  | Total Loss: 0.69 | Total Steps: 6
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 25/100  | Episode Reward: 8  | Average Reward 8.35  | Actor loss: 0.00 | Critic loss: 0.06 | Entropy loss: -0.0127  | Total Loss: 0.04 | Total Steps: 38
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 26/100  | Episode Reward: 10  | Average Reward 8.35  | Actor loss: 0.00 | Critic loss: 0.60 | Entropy

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 200
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 300
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 52/100  | Episode Reward: -28  | Average Reward 8.35  | Actor loss: 2.05 | Critic loss: 5.67 | Entropy loss: -0.0078  | Total Loss: 7.71 | Total Steps: 389
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 53/100  | Episode Reward: 5  | Average Reward 8.38  | Actor loss: -0.00 | Critic loss: 0.02 | Entropy loss: -0.0375  | Total Loss: -0.02 | Total Steps: 50
TEST: ---black sphere---
TEST: Agent i

TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 81/100  | Episode Reward: 8  | Average Reward 8.35  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0047  | Total Loss: 0.05 | Total Steps: 46
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 82/100  | Episode Reward: 10  | Average Reward 8.38  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0149  | Total Loss: 0.03 | Total Steps: 41
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 83/100  | Episode Reward: 2  | Average Reward 8.32  | Actor loss: 0.00 | Critic loss: 0.15 | Entropy loss: -0.0091  | Total Loss: 0.14 | Total Steps: 52
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 84

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3011/68000  | Episode Reward: 10  | Average Reward 9.93  | Actor loss: -0.35 | Critic loss: 4.52 | Entropy loss: -0.0037  | Total Loss: 4.16 | Total Steps: 68

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3012/68000  | Episode Reward: 10  | Average Reward 9.93  | Actor loss: 0.00 | Critic loss: 0.03 | Entropy loss: -0.0000  | Total Loss: 0.03 | Total Steps: 6

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3013/68000  | Episode Reward: 10  | Average Reward 9.93  | Actor loss: 0.00 | Critic loss: 0.10 | Entropy loss: -0.0002  | Total Loss: 0.10 | Total Steps: 37

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3014/68000  | Episode Reward: 10  | Average Reward 9.93  | Actor loss: 0.00 | Critic loss: 0.23 | Entropy loss: -0.0001  | Total Loss: 0.23 | Total Steps: 36

---yellow cube---
Age

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3044/68000  | Episode Reward: 8  | Average Reward 9.88  | Actor loss: -0.02 | Critic loss: 2.63 | Entropy loss: -0.0012  | Total Loss: 2.60 | Total Steps: 40

---red sphere---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3045/68000  | Episode Reward: 8  | Average Reward 9.85  | Actor loss: -0.09 | Critic loss: 1.12 | Entropy loss: -0.0012  | Total Loss: 1.03 | Total Steps: 38

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3046/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0001  | Total Loss: 0.05 | Total Steps: 36

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3047/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.11 | Entropy loss: -0.0002  | Total Loss: 0.12 | Total

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3077/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.12 | Critic loss: 0.46 | Entropy loss: -0.0032  | Total Loss: 0.58 | Total Steps: 36

---black cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3078/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: -0.00 | Critic loss: 0.46 | Entropy loss: -0.0001  | Total Loss: 0.46 | Total Steps: 31

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3079/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.08 | Entropy loss: -0.0001  | Total Loss: 0.08 | Total Steps: 36

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3080/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.05 | Critic loss: 0.24 | Entropy loss: -0.0007  | Total Loss: 0.29 | Total Steps: 39

---red cylinder---
Ag

TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 9/100  | Episode Reward: 5  | Average Reward 8.12  | Actor loss: 0.00 | Critic loss: 1.42 | Entropy loss: -0.0043  | Total Loss: 1.42 | Total Steps: 49
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 10/100  | Episode Reward: 10  | Average Reward 8.12  | Actor loss: 0.00 | Critic loss: 1.64 | Entropy loss: -0.0164  | Total Loss: 1.62 | Total Steps: 42
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 11/100  | Episode Reward: 10  | Average Reward 8.15  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0095  | Total Loss: -0.01 | Total Steps: 32
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 12/100  | Episode Reward: 10  | Average Reward 8.18  | Actor loss: 0.

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 10  | Average Reward 8.15  | Actor loss: 0.00 | Critic loss: 0.16 | Entropy loss: -0.0075  | Total Loss: 0.16 | Total Steps: 36
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 10  | Average Reward 8.15  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0013  | Total Loss: 0.00 | Total Steps: 6
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 42/100  | Episode Reward: 10  | Average Reward 8.15  | Actor loss: 0.02 | Critic loss: 2.08 | Entropy loss: -0.0073  | Total Loss: 2.09 | Total Steps: 39
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 10  | Average Reward 8.22  | Actor loss: 0.01 | Critic loss: 2.25 | Entropy loss: -0.0012  | Total Loss: 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: 0.01 | Critic loss: 2.21 | Entropy loss: -0.0073  | Total Loss: 2.22 | Total Steps: 66
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: -0.00 | Critic loss: 0.05 | Entropy loss: -0.0005  | Total Loss: 0.05 | Total Steps: 36
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 10  | Average Reward 8.57  | Actor loss: 0.00 | Critic loss: 0.05 | Entropy loss: -0.0037  | Total Loss: 0.05 | Total Steps: 41
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 73/100  | Episode Reward: 10  | Average Reward 8.57  | Actor loss: 0.01 | Critic loss: 0.52 | Entropy loss: -0.0079  | Total Loss: 0.5

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0011  | Total Loss: -0.00 | Total Steps: 6
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: 0.01 | Critic loss: 1.88 | Entropy loss: -0.0098  | Total Loss: 1.89 | Total Steps: 39
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: -0.00 | Critic loss: 0.03 | Entropy loss: -0.0019  | Total Loss: 0.03 | Total Steps: 36

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3102/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.00 | Critic loss: 0.59 | Entropy loss: -0.0001  | Total Loss: 0.59 | To

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3132/68000  | Episode Reward: 10  | Average Reward 9.65  | Actor loss: -0.00 | Critic loss: 0.27 | Entropy loss: -0.0003  | Total Loss: 0.26 | Total Steps: 41

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3133/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: 0.00 | Critic loss: 0.18 | Entropy loss: -0.0004  | Total Loss: 0.18 | Total Steps: 41

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3134/68000  | Episode Reward: 10  | Average Reward 9.68  | Actor loss: -0.00 | Critic loss: 0.38 | Entropy loss: -0.0001  | Total Loss: 0.38 | Total Steps: 31

---yellow cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3135/68000  | Episode Reward: 10  | Average Reward 9.70  | Actor loss: 0.01 | Critic loss: 0.25 | Entropy loss: -0.0002  | Total Loss: 0.26 | Total Steps: 41

---yellow cyl

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3165/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.00 | Critic loss: 0.21 | Entropy loss: -0.0004  | Total Loss: 0.21 | Total Steps: 41

---green cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3166/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: 0.03 | Critic loss: 0.49 | Entropy loss: -0.0005  | Total Loss: 0.52 | Total Steps: 42

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3167/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.00 | Critic loss: 0.44 | Entropy loss: -0.0000  | Total Loss: 0.44 | Total Steps: 31

---green capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3168/68000  | Episode Reward: 10  | Average Reward 9.72  | Actor loss: -0.01 | Critic loss: 0.23 | Entropy loss: -0.0008  | Total Loss: 0.22 | Total Steps: 41

---black cyli

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3198/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3199/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.30 | Entropy loss: -0.0000  | Total Loss: 0.30 | Total Steps: 31

---red prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3200/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: 0.00 | Critic loss: 0.75 | Entropy loss: -0.0000  | Total Loss: 0.75 | Total Steps: 31

---black capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3201/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: -0.01 | Critic loss: 0.34 | Entropy loss: -0.0014  | Total Loss: 0.32 | Total Steps: 41
TEST: ---blue cyli

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 26/100  | Episode Reward: 10  | Average Reward 8.12  | Actor loss: 0.01 | Critic loss: 2.18 | Entropy loss: -0.0010  | Total Loss: 2.18 | Total Steps: 36
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 10  | Average Reward 8.12  | Actor loss: 0.00 | Critic loss: 0.03 | Entropy loss: -0.0043  | Total Loss: 0.03 | Total Steps: 99
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 5  | Average Reward 8.07  | Actor loss: 0.00 | Critic loss: 0.16 | Entropy loss: -0.0026  | Total Loss: 0.16 | Total Steps: 47
TEST: ---blue cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 10  | Average Reward 8.07  | Actor loss: 0

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 10  | Average Reward 8.22  | Actor loss: 0.01 | Critic loss: 1.24 | Entropy loss: -0.0025  | Total Loss: 1.25 | Total Steps: 66
TEST: ---red capsule---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: 5  | Average Reward 8.18  | Actor loss: 0.00 | Critic loss: 0.80 | Entropy loss: -0.0067  | Total Loss: 0.79 | Total Steps: 42
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100  | Episode Reward: 10  | Average Reward 8.18  | Actor loss: 0.00 | Critic loss: 1.33 | Entropy loss: -0.0022  | Total Loss: 1.32 | Total Steps: 6
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 59/100  | Episode Reward: 10  | Average Reward 8.25  | Actor loss: -0.

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 86/100  | Episode Reward: 10  | Average Reward 8.53  | Actor loss: 0.00 | Critic loss: 1.09 | Entropy loss: -0.0007  | Total Loss: 1.09 | Total Steps: 36
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 87/100  | Episode Reward: 10  | Average Reward 8.53  | Actor loss: 0.00 | Critic loss: 0.28 | Entropy loss: -0.0002  | Total Loss: 0.28 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 88/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: 0.00 | Critic loss: 0.00 | Entropy loss: -0.0001  | Total Loss: 0.00 | Total Steps: 6
TEST: ---red capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 89/100  | Episode Reward: 10  | Average Reward 8.55  | Actor loss: 0.00 | Critic loss: 0.01 | Entropy loss: -0.0001  | Total Loss: 0.01

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3219/68000  | Episode Reward: 10  | Average Reward 9.78  | Actor loss: -0.00 | Critic loss: 0.15 | Entropy loss: -0.0001  | Total Loss: 0.15 | Total Steps: 41

---green prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3220/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.10 | Entropy loss: -0.0002  | Total Loss: 0.10 | Total Steps: 37

---blue cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3221/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.19 | Entropy loss: -0.0000  | Total Loss: 0.19 | Total Steps: 36

---blue sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3222/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: -0.01 | Critic loss: 0.21 | Entropy loss: -0.0003  | Total Loss: 0.21 | Total Steps: 41

---red cube---
Agent i

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3252/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: 0.02 | Critic loss: 0.20 | Entropy loss: -0.0006  | Total Loss: 0.22 | Total Steps: 39

---red sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3253/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: 0.00 | Critic loss: 0.45 | Entropy loss: -0.0000  | Total Loss: 0.45 | Total Steps: 31

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3254/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: -0.23 | Critic loss: 2.28 | Entropy loss: -0.0015  | Total Loss: 2.04 | Total Steps: 66

---blue capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3255/68000  | Episode Reward: 10  | Average Reward 9.82  | Actor loss: -0.00 | Critic loss: 0.00 | Entropy loss: -0.0000  | Total Loss: 0.00 | Total Steps: 6

---blue prism---
A

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3285/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.22 | Critic loss: 0.21 | Entropy loss: -0.0028  | Total Loss: 0.42 | Total Steps: 39

---yellow sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3286/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: -0.00 | Critic loss: 0.31 | Entropy loss: -0.0002  | Total Loss: 0.31 | Total Steps: 41

---yellow cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3287/68000  | Episode Reward: 10  | Average Reward 9.85  | Actor loss: 0.00 | Critic loss: 0.84 | Entropy loss: -0.0001  | Total Loss: 0.85 | Total Steps: 31

---black prism---
Decision Step reward: -2.5
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3288/68000  | Episode Reward: 8  | Average Reward 9.82  | Actor loss: -0.01 | Critic loss: 2.15 | Entropy loss: -0.0001  | Total Loss: 2.14 | Total Ste

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 15/100  | Episode Reward: 10  | Average Reward 8.90  | Actor loss: 0.00 | Critic loss: 1.37 | Entropy loss: -0.0225  | Total Loss: 1.34 | Total Steps: 35
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 16/100  | Episode Reward: 10  | Average Reward 8.90  | Actor loss: 0.00 | Critic loss: 0.16 | Entropy loss: -0.0029  | Total Loss: 0.16 | Total Steps: 41
TEST: ---green cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: 10  | Average Reward 8.95  | Actor loss: 0.00 | Critic loss: 0.29 | Entropy loss: -0.0010  | Total Loss: 0.29 | Total Steps: 31
TEST: ---green cube---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 5  | Average Reward 8.90  | Actor loss: -0.

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 45/100  | Episode Reward: 8  | Average Reward 9.03  | Actor loss: 0.01 | Critic loss: 0.09 | Entropy loss: -0.0019  | Total Loss: 0.10 | Total Steps: 29
TEST: ---yellow prism---
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 46/100  | Episode Reward: 8  | Average Reward 9.00  | Actor loss: 0.00 | Critic loss: 0.74 | Entropy loss: -0.0149  | Total Loss: 0.72 | Total Steps: 41
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 47/100  | Episode Reward: 10  | Average Reward 9.05  | Actor loss: 0.05 | Critic loss: 7.46 | Entropy loss: -0.0216  | Total Loss: 7.49 | Total Steps: 66
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: 10  | Average Reward 9.07  | Actor loss: 0.14 | Critic loss: 0.31 | Entrop

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 75/100  | Episode Reward: 10  | Average Reward 9.00  | Actor loss: 0.01 | Critic loss: 0.36 | Entropy loss: -0.0006  | Total Loss: 0.37 | Total Steps: 36
TEST: ---yellow prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 76/100  | Episode Reward: 10  | Average Reward 9.00  | Actor loss: 0.00 | Critic loss: 0.80 | Entropy loss: -0.0074  | Total Loss: 0.80 | Total Steps: 36
TEST: ---black sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 77/100  | Episode Reward: 10  | Average Reward 9.00  | Actor loss: 0.01 | Critic loss: 0.19 | Entropy loss: -0.0070  | Total Loss: 0.19 | Total Steps: 69
TEST: ---blue cylinder---
TEST: Decision Step reward: -2.5
TEST: Decision Step reward: -2.5
TEST: Step: 100
TEST: Decision Step reward: -2.5
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3307/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.01 | Critic loss: 0.83 | Entropy loss: -0.0001  | Total Loss: 0.84 | Total Steps: 31

---black cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3308/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.04 | Critic loss: 0.18 | Entropy loss: -0.0009  | Total Loss: 0.14 | Total Steps: 38

---red cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3309/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: 0.00 | Critic loss: 0.12 | Entropy loss: -0.0001  | Total Loss: 0.12 | Total Steps: 36

---blue prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3310/68000  | Episode Reward: 10  | Average Reward 9.80  | Actor loss: -0.00 | Critic loss: 0.13 | Entropy loss: -0.0003  | Total Loss: 0.12 | Total Steps: 37

---yellow capsule---


In [9]:

data = {
            'all_average_reward': all_average_reward,
            'all_episode_reward': all_episode_reward,
            'all_actor_loss': all_actor_loss,
            'all_critic_loss': all_critic_loss,
            'all_entropy_loss': all_entropy_loss,
            'all_total_loss': all_total_loss,
            'all_steps': all_steps,
        } 
file_path = f'result/{ALG_NAME}_{ENV_ID}_train.txt'
with open(file_path, 'w') as file:
    json.dump(data, file)

test_data = {
            'all_average_reward': test_average_reward,
            'all_episode_reward': test_episode_reward,
            'all_actor_loss': test_actor_loss,
            'all_critic_loss': test_critic_loss,
            'all_entropy_loss': test_entropy_loss,
            'all_total_loss': test_total_loss,
            'all_steps': test_steps,
        } 
file_path = f'result/{ALG_NAME}_{ENV_ID}_test.txt'
with open(file_path, 'w') as file:
    json.dump(test_data, file)

