In [1]:
import mlagents
from mlagents_envs.environment import UnityEnvironment as UE
import numpy as np
from mlagents_envs.environment import ActionTuple


In [2]:
filename1="C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S1a_train_260723\\build"
filename2="C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S1a_test_260723\\build"
filename3="C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S1b_train_260723\\build"
filename4="C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S1b_test_260723\\build"



env1_train =  UE(file_name=filename1,seed=1,side_channels=[],worker_id=0,no_graphics = False)
env1_train.reset()
env1_test =  UE(file_name=filename2,seed=1,side_channels=[],worker_id=1,no_graphics = False)
env1_test.reset()
env2_train =  UE(file_name=filename3,seed=1,side_channels=[],worker_id=4,no_graphics = False)
env2_train.reset()
env2_test =  UE(file_name=filename4,seed=1,side_channels=[],worker_id=7,no_graphics = False)
env2_test.reset()

In [3]:
#
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

vision_output_dim = 3136
num_words = 35  # Number of unique words in the vocabulary
language_output_dim = 128
embedding_dim = 128
mixing_dim = 256
lstm_hidden_dim = 256
num_actions = 4

# (3,128,128) --> (64,7,7) = 3136 (3-layer CNN)
class VisualModule(nn.Module): 
    def __init__(self):
        super(VisualModule, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=3, padding=0),
            nn.ReLU()
        )
        # self.conv = nn.Sequential(
        #     nn.Conv2d(3, 32, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(128, 64, kernel_size=5, stride=2, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        # )

    def forward(self, vt):
        encoded_vt = self.conv(vt)
        return encoded_vt.view(vt.size(0), -1).squeeze()

# one-hot encoding [0 0 1 0 0] --> 128 dimensional embedding (FF)
# S1:5 S2:5 S3:11 S4:9 --> 30 + 5 (noun) = 35 in total
class LanguageModule(nn.Module): 
    def __init__(self, num_words, embedding_dim):
        super(LanguageModule, self).__init__()
        self.embedding = nn.Linear(num_words, embedding_dim)

    def forward(self, lt):
        embedded_lt = self.embedding(lt)
        return embedded_lt

# 3136(vision) + 128 (language) --> 256 dimensional embedding (FF)
class MixingModule(nn.Module):
    def __init__(self, vision_output_dim, language_output_dim, mixing_dim):
        super(MixingModule, self).__init__()
        self.linear = nn.Linear(vision_output_dim + language_output_dim, mixing_dim)

    def forward(self, vision_output, language_output):
        combined_output = torch.cat((vision_output, language_output), dim=0)
        mixed_output = self.linear(combined_output)
        return mixed_output

class LSTMModule(nn.Module):
    def __init__(self,mixing_dim,lstm_hidden_dim):
        super(LSTMModule, self).__init__()
        self.lstm = nn.LSTMCell(mixing_dim, lstm_hidden_dim)
    
    def forward(self,mixed_output,lstm_hidden_state):
        lstm_hidden_state = self.lstm(mixed_output, lstm_hidden_state) 
        # lstm_output = lstm_hidden_state[0] # output is (hidden_state,cell_state), we need hidden state, shape (1,256)
        return lstm_hidden_state

class Agent(nn.Module):
    def __init__(self, num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions):
        super(Agent, self).__init__()
        self.language_module = LanguageModule(num_words, embedding_dim)
        self.visual_module = VisualModule()
        self.mixing_module = MixingModule(vision_output_dim, language_output_dim, mixing_dim)
        self.lstm_module = LSTMModule(mixing_dim, lstm_hidden_dim)
        self.action_predictor = nn.Linear(lstm_hidden_dim, num_actions)
        self.value_estimator = nn.Linear(lstm_hidden_dim, 1)

    def forward(self, vt, lt, lstm_hidden_state):
        vision_output = self.visual_module(vt)
        language_output = self.language_module(lt)
        mixed_output = self.mixing_module(vision_output, language_output).unsqueeze(0)
        lstm_output = self.lstm_module(mixed_output,lstm_hidden_state)
        action_probs = self.action_predictor(lstm_output[0]) 
        value_estimate = self.value_estimator(lstm_output[0])
        return action_probs,value_estimate,lstm_output
        
        
    def save(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
        if not os.path.exists(path):
            os.makedirs(path)
        torch.save(self.state_dict(), os.path.join(path, f'agent_{episode}.pt'))

    def load(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
        self.load_state_dict(torch.load(os.path.join(path, f'agent_{episode}.pt')))    

        


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def test(agent,test_episode,test_episode_reward,test_average_reward,test_steps,test_actor_loss,test_critic_loss,test_entropy_loss,test_total_loss):
    TEST_EPISODES = 100
    tracked_agent = -1
    entropy_term = 0
    for episode in range(TEST_EPISODES):
        test_episode += 1
        t0 = time.time()
        episode_reward = 0
        # env.reset()
        env_index = (test_episode // env_per_iteration) % 2
        if env_index == 0: env = env1_test
        else: env = env2_test
        behavior_name=list(env.behavior_specs)[0]
        spec=env.behavior_specs[behavior_name]
        # state = env.reset().astype(np.float32)
        STEPS = 0
        decision_steps, terminal_steps = env.get_steps(behavior_name)
        # state -- vt, lt, lstm
        vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
        index = int(decision_steps.obs[1])
        if env_index: index = index + 5
        print(f'TEST: ---{hashmap[index]}---')

        lt = torch.eye(num_words)[:, index].to(device)
        lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
        done = False
        while True:

            # Need to use when calculating the loss
            log_probs = []
            # values = []
            values = torch.empty(0).to(device)
            rewards = []

            
            lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
            STEPS += 1
            policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
            # value = value.detach()
            dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy()
            

            action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1))
            # action_dist = Categorical(F.softmax(policy_dist,dim=1))
            action = action_dist.sample() # sample an action from action_dist
            action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()
            
            log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
            # log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
            # entropy = -np.sum(np.mean(dist)* np.log(dist))
            entropy = F.cross_entropy(policy_dist.detach(), action)

            discrete_actions = np.array(action_onehot).reshape(1,4)*speed
            action_tuple = ActionTuple()
            action_tuple.add_discrete(discrete_actions)
            env.set_actions(behavior_name,action_tuple)
            env.step()
            decision_steps, terminal_steps = env.get_steps(behavior_name)

            if tracked_agent == -1 and len(decision_steps) >= 1:
                tracked_agent = decision_steps.agent_id[0]
                # print(tracked_agent)

            if tracked_agent in terminal_steps: # roll over or hit the target
                print('TEST: Agent in terminal steps')
                done = True
                reward = terminal_steps[tracked_agent].reward
                if reward > 0:
                    pass
                else: reward = -1 # roll over or other unseen conditions

                print(f'TEST: Terminal Step reward: {reward}')

            elif tracked_agent in decision_steps: # the agent which requires action
                reward = decision_steps[tracked_agent].reward
                # print(f'Decision Step reward: {reward}')
                if reward<0:
                    print(f'TEST: Decision Step reward: {reward}')
            if STEPS >= MAX_STEPS:
                reward = -10
                print(f'TEST: Max Step Reward: {reward}')
                env.reset()
                done = True
            if STEPS % 100 == 0:
                print (f'TEST: Step: {STEPS}')

            episode_reward = episode_reward + reward

            rewards.append(reward)
            # values.append(value)
            values = torch.cat((values, value), dim=0)
            log_probs.append(log_prob)
            entropy_term = entropy_term + entropy
            vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
            vt = vt_new

            if done:
                # _, Qval,_ = agent(vt_new,lt,lstm_hidden_state)
                # Qval = Qval.detach()
                break
            
            
        discounted_rewards = np.zeros_like(values.cpu().detach().numpy())
        cumulative = 0
        for t in reversed(range(len(rewards))):
            cumulative = rewards[t] + LAM * cumulative # Monte Carlo
            discounted_rewards[t] = cumulative
        # print(f'rewards:{rewards}, discounted_rewards:{discounted_rewards}')
        # Advantage Actor Critic

        # Qvals[-1] = rewards[t] + LAM * Qval      or       Qvals[-1] = rewards[t]                   
        # for t in range(len(rewards)-1):
        #         Qvals[t] = rewards[t] + LAM * values[t+1]
        
        # r_(t+1) = R(s_t|a_t)--> reward[t]        a_t, V_t = agent(s_t)
        # A_t = r_(t+1) + LAM * V_(t+1) - V_t 
        #     = Q_t - V_t
        
        # Monte Carlo Advantage = reward + LAM * cumulative_reward
        # Actor_loss = -log(pai(s_t|a_t))*A_t
        # Critic_loss = A_t.pow(2) *0.5
        # Entropy_loss = -F.entropy(pai(St),index) * 0.001

        # entropy = -np.sum(np.mean(dist) * np.log(dist))
        
        #update actor critic
        
        # values = torch.FloatTensor(values).requires_grad_(True).to(device)
        discounted_rewards = torch.FloatTensor(discounted_rewards.astype(np.float32)).to(device)
        log_probs = torch.stack(log_probs)
        advantage = discounted_rewards - values
        actor_loss = (-log_probs * advantage).mean()
        critic_loss = 0.5 * torch.square(advantage).mean()
        entropy_term /= STEPS
        entropy_loss = -0.1 * entropy_term
        ac_loss = actor_loss + critic_loss + entropy_loss
        test_episode_reward.append(float(episode_reward))
        test_steps.append(STEPS)
        test_actor_loss.append(float(actor_loss))
        test_critic_loss.append(float(critic_loss))
        test_entropy_loss.append(float(entropy_loss))
        test_total_loss.append(float(ac_loss))

        if test_episode >= 200:
            avg_score = np.mean(test_episode_reward[-200:])
            test_average_reward.append(avg_score)
            print('Testing  | Episode: {}/{}  | Episode Reward: {:.0f}  | Average Reward {:.2f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                .format(episode + 1, TEST_EPISODES, episode_reward, avg_score, actor_loss, critic_loss,entropy_loss,  ac_loss, STEPS))
        else:  print('Testing  | Episode: {}/{}  | Episode Reward: {:.0f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                .format(episode + 1, TEST_EPISODES, episode_reward, actor_loss, critic_loss, entropy_loss,  ac_loss, STEPS))
    return test_episode,test_episode_reward,test_average_reward,test_steps,test_actor_loss,test_critic_loss,test_entropy_loss,test_total_loss
                

In [None]:
#
import argparse
import time
import json
# import matplotlib.pyplot as plt
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical

entropy_term = 0
# add arguments in command --train/test
# parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
# parser.add_argument('--train', dest='train', action='store_true', default=False)
# parser.add_argument('--test', dest='test', action='store_true', default=True)
# args = parser.parse_args()
train = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device) 

S0_ALG_NAME = 'S1ab_final'
S0_ENV_ID = '3'
S0_episode = 32000

ALG_NAME = 'S1ab_final'
ENV_ID = '4'
TRAIN_EPISODES = 129000  # number of overall episodes for training  # number of overall episodes for testing
MAX_STEPS = 500  # maximum time step in one episode
LAM = 0.95  # reward discount in TD error
env_per_iteration = 100
lr = 2.5e-5  #0.00005 
speed = 3
num_steps = 250 # the step for updating the network
test_episode = 0
if __name__ == '__main__':
    agent = Agent(num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions)
    agent.load(S0_episode,S0_ALG_NAME,S0_ENV_ID)
    agent.to(device)
    optimizer = optim.RMSprop(agent.parameters(), lr=lr)
    best_score = float('-inf')
    hashmap = {
        0: 'capsule',
        1: 'cube',
        2: 'cylinder',
        3: 'prism',
        4: 'sphere',
        5: 'red',
        6: 'green',
        7: 'blue',
        8: 'yellow',
        9: 'black'}
    if train:
        entropy_term = 0
        test_episode_reward = []
        test_average_reward = []
        test_steps = []
        test_actor_loss = []
        test_critic_loss = []
        test_entropy_loss = []
        test_total_loss = []
        tracked_agent = -1
        test_episode = 0
        all_episode_reward = []
        all_average_reward = []
        all_steps = []
        all_actor_loss = []
        all_critic_loss = []
        all_entropy_loss = []
        all_total_loss = []
        for episode in range(TRAIN_EPISODES):
            t0 = time.time()
            episode_reward = 0
            # env.reset()
            env_index = (episode // env_per_iteration) % 2
            if env_index == 0: env = env1_train
            else: env = env2_train
            behavior_name=list(env.behavior_specs)[0]
            spec=env.behavior_specs[behavior_name]
            # state = env.reset().astype(np.float32)
            STEPS = 0

            decision_steps, terminal_steps = env.get_steps(behavior_name)
            # state -- vt, lt, lstm
            vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
            index = int(decision_steps.obs[1])
            if env_index: index = index + 5
            print(f'---{hashmap[index]}---')

            lt = torch.eye(num_words)[:, index].to(device)
            lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
            done = False
            while True:

                # Need to use when calculating the loss
                log_probs = []
                # values = []
                values = torch.empty(0).to(device)
                rewards = []

                for steps in range(num_steps):
                    lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
                    STEPS += 1
                    policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
                    # value = value.detach()
                    dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy()
                    

                    action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1))
                    # action_dist = Categorical(F.softmax(policy_dist,dim=1))
                    action = action_dist.sample() # sample an action from action_dist
                    action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()
                    
                    log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
                    # log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
                    # entropy = -np.sum(np.mean(dist)* np.log(dist))
                    entropy = F.cross_entropy(policy_dist.detach(), action)

                    discrete_actions = np.array(action_onehot).reshape(1,4)*speed
                    action_tuple = ActionTuple()
                    action_tuple.add_discrete(discrete_actions)
                    env.set_actions(behavior_name,action_tuple)
                    env.step()
                    decision_steps, terminal_steps = env.get_steps(behavior_name)

                    if tracked_agent == -1 and len(decision_steps) >= 1:
                        tracked_agent = decision_steps.agent_id[0]
                        # print(tracked_agent)

                    if tracked_agent in terminal_steps: # roll over or hit the target
                        print('Agent in terminal steps')
                        done = True
                        reward = terminal_steps[tracked_agent].reward
                        if reward > 0:
                            pass
                        else: reward = -1 # roll over or other unseen conditions

                        print(f'Terminal Step reward: {reward}')

                    elif tracked_agent in decision_steps: # the agent which requires action
                        reward = decision_steps[tracked_agent].reward
                        # print(f'Decision Step reward: {reward}')
                        if reward<0:
                            print(f'Decision Step reward: {reward}')
                    if STEPS >= MAX_STEPS:
                        reward = -10
                        print(f'Max Step Reward: {reward}')
                        env.reset()
                        done = True
                    if STEPS % num_steps == 0:
                        print (f'Step: {STEPS}')

                    episode_reward = episode_reward + reward

                    rewards.append(reward)
                    # values.append(value)
                    values = torch.cat((values, value), dim=0)
                    log_probs.append(log_prob)
                    entropy_term = entropy_term + entropy
                    vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
                    vt = vt_new

                    if done or steps == num_steps-1:
                        # _, Qval,_ = agent(vt_new,lt,lstm_hidden_state)
                        # Qval = Qval.detach()
                        break
                
                
                discounted_rewards = np.zeros_like(values.cpu().detach().numpy())
                cumulative = 0
                for t in reversed(range(len(rewards))):
                    cumulative = rewards[t] + LAM * cumulative # Monte Carlo
                    discounted_rewards[t] = cumulative
                # print(f'rewards:{rewards}, discounted_rewards:{discounted_rewards}')
                # Advantage Actor Critic

                # Qvals[-1] = rewards[t] + LAM * Qval      or       Qvals[-1] = rewards[t]                   
                # for t in range(len(rewards)-1):
                #         Qvals[t] = rewards[t] + LAM * values[t+1]
                
                # r_(t+1) = R(s_t|a_t)--> reward[t]        a_t, V_t = agent(s_t)
                # A_t = r_(t+1) + LAM * V_(t+1) - V_t 
                #     = Q_t - V_t
                
                # Monte Carlo Advantage = reward + LAM * cumulative_reward
                # Actor_loss = -log(pai(s_t|a_t))*A_t
                # Critic_loss = A_t.pow(2) *0.5
                # Entropy_loss = -F.entropy(pai(St),index) * 0.001

                # entropy = -np.sum(np.mean(dist) * np.log(dist))
                
                #update actor critic
                
                # values = torch.FloatTensor(values).requires_grad_(True).to(device)
                discounted_rewards = torch.FloatTensor(discounted_rewards.astype(np.float32)).to(device)
                log_probs = torch.stack(log_probs)
                advantage = discounted_rewards - values
                actor_loss = (-log_probs * advantage).mean()
                critic_loss = 0.5 * torch.square(advantage).mean()
                entropy_term /= num_steps
                entropy_loss = -0.1 * entropy_term
                ac_loss = actor_loss + critic_loss + entropy_loss
                # ac_loss = values.mean()
                optimizer.zero_grad()
                ac_loss.backward()
                optimizer.step()
                # for name, param in agent.named_parameters():
                #     if param.grad is not None:
                #         print(name, param.grad)
                #     else:
                #         print(name, "gradients not computed")
                # for name, param in agent.named_parameters():
                #     if name == 'value_estimator.weight':
                #         print(name, param)
                
                
                if done: break


            all_episode_reward.append(float(episode_reward))
            all_steps.append(STEPS)
            all_actor_loss.append(float(actor_loss))
            all_critic_loss.append(float(critic_loss))
            all_entropy_loss.append(float(entropy_loss))
            all_total_loss.append(float(ac_loss))
            if episode >= 200:
                avg_score = np.mean(all_episode_reward[-200:])
                all_average_reward.append(avg_score)
                if avg_score > best_score:
                    best_score = avg_score
                    agent.save(episode, ALG_NAME, ENV_ID)
                    print(f'-----The best score for averaging previous 200 episode reward is {best_score}. Model has been saved-----')
                print('Training  | Episode: {}/{}  | Episode Reward: {:.0f}  | Average Reward {:.2f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                    .format(episode + 1, TRAIN_EPISODES, episode_reward, avg_score, actor_loss, critic_loss,entropy_loss,  ac_loss, STEPS))
            else:  print('Training  | Episode: {}/{}  | Episode Reward: {:.0f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                    .format(episode + 1, TRAIN_EPISODES, episode_reward, actor_loss, critic_loss, entropy_loss,  ac_loss, STEPS))
            if episode%500 == 0:
                    agent.save(episode, ALG_NAME, ENV_ID)
                    print("Model has been saved")
            if episode%100 == 0:
                test_episode,test_episode_reward,test_average_reward,test_steps,test_actor_loss,test_critic_loss,test_entropy_loss,test_total_loss = test(agent,test_episode,test_episode_reward,test_average_reward,test_steps,test_actor_loss,test_critic_loss,test_entropy_loss,test_total_loss)

        print(all_average_reward)
        agent.save(episode ,ALG_NAME, ENV_ID)
        print("Model has been saved")

        data = {
                    'all_average_reward': all_average_reward,
                    'all_episode_reward': all_episode_reward,
                    'all_actor_loss': all_actor_loss,
                    'all_critic_loss': all_critic_loss,
                    'all_entropy_loss': all_entropy_loss,
                    'all_total_loss': all_total_loss,
                    'all_steps': all_steps,
                } 
        file_path = f'result/{ALG_NAME}_{ENV_ID}_train.txt'
        with open(file_path, 'w') as file:
            json.dump(data, file)
        
        test_data = {
                    'all_average_reward': test_average_reward,
                    'all_episode_reward': test_episode_reward,
                    'all_actor_loss': test_actor_loss,
                    'all_critic_loss': test_critic_loss,
                    'all_entropy_loss': test_entropy_loss,
                    'all_total_loss': test_total_loss,
                    'all_steps': test_steps,
                } 
        file_path = f'result/{ALG_NAME}_{ENV_ID}_test.txt'
        with open(file_path, 'w') as file:
            json.dump(test_data, file)



cuda
---prism---




Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1/129000  | Episode Reward: 10  | Actor loss: 0.40 | Critic loss: 5.58 | Entropy loss: -0.0002  | Total Loss: 5.98 | Total Steps: 6
Model has been saved
TEST: ---sphere---




TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 1/100  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 0.26 | Entropy loss: -0.0006  | Total Loss: 0.26 | Total Steps: 6
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 4  | Actor loss: 0.06 | Critic loss: 1.19 | Entropy loss: -0.0134  | Total Loss: 1.24 | Total Steps: 49
TEST: ---sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3/100  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 3.67 | Entropy loss: -0.0008  | Total Loss: 3.68 | Total Steps: 31
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 4  | Actor loss: 0.01 | Critic loss: 11.45 | Entropy loss: -0.0

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: -44  | Actor loss: 0.36 | Critic loss: 5.66 | Entropy loss: -0.0081  | Total Loss: 6.01 | Total Steps: 262
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 1  | Actor loss: 0.09 | Critic loss: 11.24 | Entropy loss: -0.0139  | Total Loss: 11.32 | Total Steps: 57
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: 7  | Actor loss: 0.00 | Critic loss: 2.45 | Entropy loss: -0.0011  | Total Loss: 2.46 | Total Steps: 29
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 7  | Actor loss: 0.01 | Cri

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 400
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 36/100  | Episode Reward: -121  | Actor loss: -0.05 | Critic loss: 130.08 | Entropy loss: -0.0050  | Total Loss: 130.03 | Total Steps: 500
TEST: ---sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 37/100  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 0.34 | Entropy loss: -0.0013  | Total Loss: 0.35 | Total Steps: 6
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testin

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 54/100  | Episode Reward: 4  | Actor loss: 0.01 | Critic loss: 9.01 | Entropy loss: -0.0005  | Total Loss: 9.02 | Total Steps: 49
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 55/100  | Episode Reward: 7  | Actor loss: 0.02 | Critic loss: 12.84 | Entropy loss: -0.0016  | Total Loss: 12.85 | Total Steps: 34
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 7  | Actor loss: 0.01 | Critic loss: 13.00 | Entropy loss: -0.0006  | Total Loss: 13.01 | Total Steps: 29
TEST: ---capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: 10  | Actor loss: 0.03 | Critic loss: 18.95 | Entropy loss: -0.0011  | Total Loss: 18.97 | Total Steps: 6
TEST: ---capsule---

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 1  | Actor loss: 2.21 | Critic loss: 8.75 | Entropy loss: -0.0105  | Total Loss: 10.94 | Total Steps: 51
TEST: ---cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 5.96 | Entropy loss: -0.0021  | Total Loss: 5.96 | Total Steps: 6
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 1  | Actor loss: 0.01 | Critic loss: 7.17 | Entropy loss: -0.0008  | Total Loss: 7.18 | Total Steps: 53
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decis

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 96/100  | Episode Reward: 4  | Actor loss: 2.11 | Critic loss: 15.82 | Entropy loss: -0.0017  | Total Loss: 17.93 | Total Steps: 47
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 97/100  | Episode Reward: 7  | Actor loss: 0.01 | Critic loss: 11.98 | Entropy loss: -0.0006  | Total Loss: 11.99 | Total Steps: 29
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 4  | Actor loss: 0.01 | Critic loss: 6.04 | Entropy loss: -0.0011  | Total Loss: 6.05 | Total Steps: 42
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 30/129000  | Episode Reward: 4  | Actor loss: -0.00 | Critic loss: 7.34 | Entropy loss: -0.0001  | Total Loss: 7.33 | Total Steps: 46
---prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 31/129000  | Episode Reward: 10  | Actor loss: 0.35 | Critic loss: 4.62 | Entropy loss: -0.0014  | Total Loss: 4.96 | Total Steps: 31
---cube---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 32/129000  | Episode Reward: 7  | Actor loss: -0.03 | Critic loss: 6.24 | Entropy loss: -0.0004  | Total Loss: 6.21 | Total Steps: 30
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 33/129000  | Episode Reward: -8  | Actor loss: -0.39 | Critic loss: 29.18 | Entropy l

---sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 61/129000  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 1.11 | Entropy loss: -0.0000  | Total Loss: 1.12 | Total Steps: 6
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 62/129000  | Episode Reward: 10  | Actor loss: 0.03 | Critic loss: 4.51 | Entropy loss: -0.0001  | Total Loss: 4.53 | Total Steps: 29
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 63/129000  | Episode Reward: 4  | Actor loss: 0.30 | Critic loss: 10.35 | Entropy loss: -0.0069  | Total Loss: 10.64 | Total Steps: 58
---cylinder---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 64/129000  | Episode Reward: 7  | Actor loss: 0.06 | Critic loss: 5.12 | Entropy loss: -0.0003  | Total Loss: 5.18 | Total Steps: 29
---prism---
Agent in terminal steps
Termin

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 92/129000  | Episode Reward: 7  | Actor loss: 0.09 | Critic loss: 5.31 | Entropy loss: -0.0004  | Total Loss: 5.40 | Total Steps: 30
---sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 93/129000  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 2.08 | Entropy loss: -0.0000  | Total Loss: 2.08 | Total Steps: 6
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 94/129000  | Episode Reward: 4  | Actor loss: -0.03 | Critic loss: 8.46 | Entropy loss: -0.0023  | Total Loss: 8.43 | Total Steps: 44
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 95/129000  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 1.62 | Entropy loss: -0.0000  | Total Loss: 1.63 | Total Steps: 6
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal s

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 0  | Actor loss: 0.02 | Critic loss: 1.80 | Entropy loss: -0.0372  | Total Loss: 1.79 | Total Steps: 64
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 21/100  | Episode Reward: 10  | Actor loss: 0.20 | Critic loss: 11.45 | Entropy loss: -0.0084  | Total Loss: 11.64 | Total Steps: 6
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 4  | Actor loss: 0.01 | Critic loss: 3.37 | Entropy loss: -0.0096  | Total Loss: 3.37 | Total Steps: 45
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 23/100  | Episode Reward:

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: 1  | Actor loss: 0.00 | Critic loss: 7.11 | Entropy loss: -0.0017  | Total Loss: 7.11 | Total Steps: 53
TEST: ---black---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 49/100  | Episode Reward: 10  | Actor loss: 0.02 | Critic loss: 10.72 | Entropy loss: -0.0012  | Total Loss: 10.74 | Total Steps: 6
TEST: ---green---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 50/100  | Episode Reward: 10  | Actor loss: 0.00 | Critic loss: 2.57 | Entropy loss: -0.0007  | Total Loss: 2.57 | Total Steps: 6
TEST: ---yellow---
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 51/100  | Episode Reward: 5  | Actor loss: 0.10 | Critic loss: 5.44 | Entropy loss: -0.0299  | Total Loss: 5.51 | Total St

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 75/100  | Episode Reward: 1  | Actor loss: 0.41 | Critic loss: 19.35 | Entropy loss: -0.0193  | Total Loss: 19.74 | Total Steps: 51
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 76/100  | Episode Reward: 4  | Actor loss: 0.02 | Critic loss: 7.03 | Entropy loss: -0.0077  | Total Loss: 7.05 | Total Steps: 49
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 77/100  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 2.12 | Entropy loss: -0.0016  | Total Loss: 2.13 | Total Steps: 6
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episode Reward: 7  | Actor loss: 0.61 | Critic los

Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 107/129000  | Episode Reward: 3  | Actor loss: -0.91 | Critic loss: 6.74 | Entropy loss: -0.0102  | Total Loss: 5.83 | Total Steps: 65
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 108/129000  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 1.06 | Entropy loss: -0.0001  | Total Loss: 1.06 | Total Steps: 6
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 109/129000  | Episode Reward: 10  | Actor loss: 0.01 | Critic loss: 1.04 | Entropy loss: -0.0000  | Total Loss: 1.04 | Total Steps: 6
---green---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 110/129000  | Episode Reward: 7  | Actor loss: -0.04 | Critic loss: 6.92 | Entropy loss: -0.0034  | Total Loss: 6.87 | Total Steps: 45
---blue---
Agent in terminal s

---green---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 141/129000  | Episode Reward: 4  | Actor loss: 0.00 | Critic loss: 8.13 | Entropy loss: -0.0013  | Total Loss: 8.13 | Total Steps: 46
---yellow---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 142/129000  | Episode Reward: 4  | Actor loss: 0.01 | Critic loss: 9.43 | Entropy loss: -0.0106  | Total Loss: 9.44 | Total Steps: 59
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 143/129000  | Episode Reward: 1  | Actor loss: -0.18 | Critic loss: 8.58 | Entropy loss: -0.0008  | Total Loss: 8.40 | Total Steps: 52
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 144/129000  | Episode Reward: 10  | Actor loss: 0.03 | Critic loss: 0.8

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 175/129000  | Episode Reward: 10  | Actor loss: 0.15 | Critic loss: 5.48 | Entropy loss: -0.0039  | Total Loss: 5.63 | Total Steps: 36
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 176/129000  | Episode Reward: 1  | Actor loss: -0.04 | Critic loss: 12.62 | Entropy loss: -0.0004  | Total Loss: 12.58 | Total Steps: 53
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 177/129000  | Episode Reward: 4  | Actor loss: -0.02 | Critic loss: 7.27 | Entropy loss: -0.0021  | Total Loss: 7.24 | Total Steps: 55
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 178/129000  | Episode Reward: 10  | Actor loss: 0.06 | Critic loss: 4.43 | Entropy loss: -0.0046  | Total Loss: 4.48 | Total Steps: 38
--

TEST: Step: 100
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 5/100  | Episode Reward: 4  | Average Reward -1.96  | Actor loss: 0.01 | Critic loss: 1.54 | Entropy loss: -0.0059  | Total Loss: 1.55 | Total Steps: 230
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 6/100  | Episode Reward: 4  | Average Reward -1.36  | Actor loss: 2.58 | Critic loss: 19.81 | Entropy loss: -0.0098  | Total Loss: 22.39 | Total Steps: 48
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 7/100  | Episode Reward: -2  | Average Reward -1.38  | Actor loss: 0.10 | Critic loss: 29.52 | Entropy los

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 7  | Average Reward -0.69  | Actor loss: 0.00 | Critic loss: 2.66 | Entropy loss: -0.0035  | Total Loss: 2.66 | Total Steps: 34
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 4  | Average Reward -0.69  | Actor loss: 0.45 | Critic loss: 10.93 | Entropy loss: -0.0236  | Total Loss: 11.36 | Total Steps: 43
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 4  | Average Reward -0.68  | Actor loss: 0.06 | Critic loss: 28.98 | Entropy loss: -0.0337  | Total Loss: 29.00 | Total Steps: 68
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward:

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: -8  | Average Reward 1.65  | Actor loss: 0.07 | Critic loss: 10.24 | Entropy loss: -0.0086  | Total Loss: 10.30 | Total Steps: 107
TEST: ---cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: 10  | Average Reward 1.65  | Actor loss: 0.00 | Critic loss: 2.10 | Entropy loss: -0.0018  | Total Loss: 2.10 | Total Steps: 6
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100  | Episode Reward: 4  | Average Reward 1.66  | Actor loss: 0.00 | Critic loss: 2.28 | Entropy loss: -0.0286  | Total Los

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episode Reward: 4  | Average Reward 3.40  | Actor loss: 1.44 | Critic loss: 20.83 | Entropy loss: -0.0196  | Total Loss: 22.25 | Total Steps: 51
TEST: ---sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: 10  | Average Reward 3.42  | Actor loss: 10.91 | Critic loss: 19.26 | Entropy loss: -0.0472  | Total Loss: 30.12 | Total Steps: 6
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 7  | Average Reward 3.44  | Actor loss: 0.01 | Critic loss: 14.96 | Entropy loss: -0.0025  | Total Loss: 14.97 | Total Steps: 29
TEST: ---cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 81/100  | Episode Reward: 10  | Average Reward 3.45  | Actor loss: 1.45 | Cr

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 4  | Average Reward 2.40  | Actor loss: 0.18 | Critic loss: 15.06 | Entropy loss: -0.0007  | Total Loss: 15.24 | Total Steps: 47
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 10  | Average Reward 2.40  | Actor loss: 0.01 | Critic loss: 1.76 | Entropy loss: -0.0005  | Total Loss: 1.77 | Total Steps: 6
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.85. Model has been saved-----
Training  | Episode: 202/129000  | Episode Reward: 10  | Average Reward 5.85  | Actor loss: 0.01 | Critic loss: 3.70 | Entropy loss: -0.0000  | Total Loss: 3.70 | Total Steps: 6
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.91. Model 

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 228/129000  | Episode Reward: 4  | Average Reward 5.58  | Actor loss: -0.02 | Critic loss: 6.80 | Entropy loss: -0.0011  | Total Loss: 6.78 | Total Steps: 44
---cube---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 229/129000  | Episode Reward: 7  | Average Reward 5.58  | Actor loss: -0.99 | Critic loss: 7.86 | Entropy loss: -0.0064  | Total Loss: 6.86 | Total Steps: 44
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 230/129000  | Episode Reward: 4  | Average Reward 5.58  | Actor loss: -0.00 | Critic loss: 7.63 | Entropy loss: -0.0006  | Total Loss: 7.63 | Total Steps: 53
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 231/129000  | Episode Reward: 10  | Average Reward 5.58  | Actor loss: 1.24 | Critic loss: 10.

Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 6.18. Model has been saved-----
Training  | Episode: 257/129000  | Episode Reward: 10  | Average Reward 6.18  | Actor loss: 0.04 | Critic loss: 2.60 | Entropy loss: -0.0003  | Total Loss: 2.64 | Total Steps: 34
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 258/129000  | Episode Reward: 1  | Average Reward 6.13  | Actor loss: -0.14 | Critic loss: 14.02 | Entropy loss: -0.0009  | Total Loss: 13.87 | Total Steps: 52
---sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 259/129000  | Episode Reward: 10  | Average Reward 6.17  | Actor loss: 0.56 | Critic loss: 3.83 | Entropy loss: -0.0012  | Total Loss: 4.38 | Total Steps: 13
---sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 260/129000  | Episode

Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 286/129000  | Episode Reward: 1  | Average Reward 6.38  | Actor loss: -0.16 | Critic loss: 13.46 | Entropy loss: -0.0011  | Total Loss: 13.29 | Total Steps: 53
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 287/129000  | Episode Reward: 10  | Average Reward 6.41  | Actor loss: 0.02 | Critic loss: 1.49 | Entropy loss: -0.0000  | Total Loss: 1.51 | Total Steps: 6
---prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 288/129000  | Episode Reward: 10  | Average Reward 6.41  | Actor loss: 0.03 | Critic loss: 3.41 | Entropy loss: -0.0000  | Total Loss: 3.44 | Total Steps: 6
---sphere---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 289/129000  | Episode Reward: 7  | Average Reward 6.41  | Actor loss: 0.01 | Critic loss: 6

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 14/100  | Episode Reward: 1  | Average Reward 2.72  | Actor loss: 0.86 | Critic loss: 3.13 | Entropy loss: -0.0041  | Total Loss: 3.99 | Total Steps: 52
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 15/100  | Episode Reward: 1  | Average Reward 2.71  | Actor loss: 0.02 | Critic loss: 2.25 | Entropy loss: -0.0089  | Total Loss: 2.26 | Total Steps: 51
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 16/100  | Episode Reward: 10  | Average Reward 2.71  | Actor loss: 0.05 | Critic loss: 3.57 | Entropy loss: -0.0429  | Total Loss: 3.57 | Total Steps: 10
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST:

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 1  | Average Reward 2.60  | Actor loss: 0.05 | Critic loss: 2.04 | Entropy loss: -0.0017  | Total Loss: 2.08 | Total Steps: 53
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 42/100  | Episode Reward: 7  | Average Reward 2.68  | Actor loss: 0.05 | Critic loss: 2.46 | Entropy loss: -0.0031  | Total Loss: 2.51 | Total Steps: 30
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 10  | Average Reward 2.73  | Actor loss: 0.01 | Critic loss: 0.61 | Entropy loss: -0.0480  | Total Loss: 0.57 | Total Steps: 10
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | 

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 68/100  | Episode Reward: 4  | Average Reward 3.10  | Actor loss: 2.17 | Critic loss: 2.30 | Entropy loss: -0.0159  | Total Loss: 4.45 | Total Steps: 37
TEST: ---green---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 69/100  | Episode Reward: 10  | Average Reward 3.12  | Actor loss: 0.00 | Critic loss: 3.50 | Entropy loss: -0.0036  | Total Loss: 3.50 | Total Steps: 6
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 10  | Average Reward 3.12  | Actor loss: 0.05 | Critic loss: 4.61 | Entropy loss: -0.0400  | Total Loss: 4.62 | Total Steps: 69
TEST: ---yellow---
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 9  | Average Reward 3.16  | Actor loss: 0.02 | Critic loss: 

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 400
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 95/100  | Episode Reward: -49  | Average Reward 2.72  | Actor loss: -16.29 | Critic loss: 131.36 | Entropy loss: -0.0375  | Total Loss: 115.03 | Total Steps: 500
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 96/100  | Episode Reward: 10  | Average Reward 2.73  | Actor loss: 0.03 | Critic loss: 1.67 | Entropy loss: -0.0243  | Total Loss: 1.68 | Total Steps: 11
TEST: ---blue---
TEST: Dec

Decision Step reward: -3.0
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 324/129000  | Episode Reward: 3  | Average Reward 6.39  | Actor loss: -1.02 | Critic loss: 10.19 | Entropy loss: -0.0052  | Total Loss: 9.17 | Total Steps: 54
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 325/129000  | Episode Reward: 10  | Average Reward 6.39  | Actor loss: -0.01 | Critic loss: 3.19 | Entropy loss: -0.0019  | Total Loss: 3.18 | Total Steps: 30
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 326/129000  | Episode Reward: 10  | Average Reward 6.39  | Actor loss: 0.01 | Critic loss: 1.36 | Entropy loss: -0.0000  | Total Loss: 1.37 | Total Steps: 6
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 327/129000  | Episode Reward: 10  | Average Reward 6.44  | Actor loss: 0.01 | Critic loss: 1.38 | Entropy loss: -0.0000  | Total Loss: 1.38 | Total

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 353/129000  | Episode Reward: 7  | Average Reward 6.47  | Actor loss: 0.47 | Critic loss: 4.16 | Entropy loss: -0.0022  | Total Loss: 4.62 | Total Steps: 31
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 354/129000  | Episode Reward: 7  | Average Reward 6.47  | Actor loss: 0.04 | Critic loss: 8.13 | Entropy loss: -0.0002  | Total Loss: 8.17 | Total Steps: 30
---green---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 355/129000  | Episode Reward: 1  | Average Reward 6.44  | Actor loss: -0.65 | Critic loss: 8.07 | Entropy loss: -0.0027  | Total Loss: 7.42 | Total Steps: 57
---black---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 356/129000  | Episode Reward: 7  | Average Reward 6.44  | Actor loss: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 383/129000  | Episode Reward: 7  | Average Reward 6.33  | Actor loss: 0.12 | Critic loss: 7.90 | Entropy loss: -0.0005  | Total Loss: 8.01 | Total Steps: 29
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 384/129000  | Episode Reward: 10  | Average Reward 6.33  | Actor loss: 0.01 | Critic loss: 0.94 | Entropy loss: -0.0000  | Total Loss: 0.95 | Total Steps: 6
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 385/129000  | Episode Reward: 10  | Average Reward 6.33  | Actor loss: 0.00 | Critic loss: 0.88 | Entropy loss: -0.0000  | Total Loss: 0.88 | Total Steps: 6
---blue---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 386/129000  | Episode Reward: 10  | Average Reward 6.38  | Actor loss: 0.94 | Critic loss: 0.91 | Entropy loss: -0.0022  | Total Loss: 1.84 | Total Steps: 7
---red---
Agent in terminal steps
Terminal Step r

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 8/100  | Episode Reward: 4  | Average Reward 2.10  | Actor loss: 3.02 | Critic loss: 30.14 | Entropy loss: -0.0121  | Total Loss: 33.14 | Total Steps: 51
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 9/100  | Episode Reward: 4  | Average Reward 2.10  | Actor loss: 0.00 | Critic loss: 2.09 | Entropy loss: -0.0010  | Total Loss: 2.09 | Total Steps: 42
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 10/100  | Episode Reward: 7  | Average Reward 2.18  | Actor loss: 0.01 | Critic loss: 14.44 | Entropy loss: -0.0006  | Total Loss: 14.45 | Total Steps: 29
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TES

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 4  | Average Reward 1.98  | Actor loss: 0.59 | Critic loss: 19.86 | Entropy loss: -0.0034  | Total Loss: 20.44 | Total Steps: 47
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 30/100  | Episode Reward: 4  | Average Reward 2.02  | Actor loss: 9.08 | Critic loss: 5.53 | Entropy loss: -0.0086  | Total Loss: 14.61 | Total Steps: 43
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 31/100  | Episode Reward: 1  | Average Reward 1.98  | Actor loss: 0.84 | Critic loss: 15.77 | Entropy loss: -0.0414  | Total Loss: 16.57 | Total Steps: 43
TEST: ---cylinder---
TEST: Decision Step reward: -3.

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 52/100  | Episode Reward: 4  | Average Reward 1.27  | Actor loss: 0.00 | Critic loss: 11.06 | Entropy loss: -0.0076  | Total Loss: 11.06 | Total Steps: 47
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 53/100  | Episode Reward: 4  | Average Reward 1.24  | Actor loss: 0.00 | Critic loss: 6.82 | Entropy loss: -0.0033  | Total Loss: 6.81 | Total Steps: 42
TEST: ---capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 54/100  | Episode Reward: 10  | Average Reward 1.25  | Actor loss: 0.03 | Critic loss: 20.05 | Entropy loss: -0.0010  | Total Loss: 20.08 | Total Steps: 6
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Te

TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 71/100  | Episode Reward: -115  | Average Reward 0.72  | Actor loss: -0.01 | Critic loss: 139.31 | Entropy loss: -0.0211  | Total Loss: 139.28 | Total Steps: 500
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 4  | Average Reward 0.72  | Actor loss: 4.58 | Critic loss: 16.27 | Entropy loss: -0.0308  | Total Loss: 20.82 | Total Steps: 64
TEST: ---cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 73/100  | Episode Reward: 10  | Average Reward 0.77  | Actor loss: 0.00 | Critic loss: 2.23 | Entropy loss: -0.0055  | Total Loss: 2.22 | Total Steps: 6
TEST: ---prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 74/100  | Episode Reward: 10  | Average Reward 0.77  | Actor loss:

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 88/100  | Episode Reward: 4  | Average Reward -0.89  | Actor loss: 0.01 | Critic loss: 11.34 | Entropy loss: -0.0096  | Total Loss: 11.34 | Total Steps: 76
TEST: ---cube---
TEST: Step: 100
TEST: Step: 200
TEST: Step: 300
TEST: Step: 400
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 89/100  | Episode Reward: -10  | Average Reward -0.95  | Actor loss: -0.00 | Critic loss: 76.74 | Entropy loss: -0.0032  | Total Loss: 76.73 | Total Steps: 500
TEST: ---sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 90/100  | Episode Reward: 10  | Average Reward -0.93  | Actor loss: 1.61 | Critic loss: 12.92 | Entropy loss: -0.0129  | Total Loss: 14.52 | Total Steps: 6
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step 

Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 410/129000  | Episode Reward: 4  | Average Reward 6.42  | Actor loss: -0.25 | Critic loss: 8.15 | Entropy loss: -0.0012  | Total Loss: 7.91 | Total Steps: 42
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 411/129000  | Episode Reward: 10  | Average Reward 6.46  | Actor loss: 0.01 | Critic loss: 7.42 | Entropy loss: -0.0000  | Total Loss: 7.43 | Total Steps: 6
---capsule---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 412/129000  | Episode Reward: 7  | Average Reward 6.50  | Actor loss: 0.00 | Critic loss: 4.05 | Entropy loss: -0.0005  | Total Loss: 4.06 | Total Steps: 47
---prism---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 413/129000  | Episode Reward: 7  | Average Reward 6.51  | Actor loss: 0.22 | Critic loss: 5.43 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 439/129000  | Episode Reward: 7  | Average Reward 6.38  | Actor loss: 0.19 | Critic loss: 7.73 | Entropy loss: -0.0006  | Total Loss: 7.92 | Total Steps: 29
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 440/129000  | Episode Reward: -2  | Average Reward 6.34  | Actor loss: -1.08 | Critic loss: 12.70 | Entropy loss: -0.0060  | Total Loss: 11.61 | Total Steps: 58
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 441/129000  | Episode Reward: 10  | Average Reward 6.36  | Actor loss: 0.23 | Critic loss: 4.55 | Entropy loss: -0.0008  | Total Loss: 4.78 | Total Steps: 29
---capsule---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 442/129000  | Episode Reward: 7  | Average Reward 6.34  | Ac

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 466/129000  | Episode Reward: 4  | Average Reward 5.87  | Actor loss: -0.26 | Critic loss: 4.50 | Entropy loss: -0.0032  | Total Loss: 4.23 | Total Steps: 47
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 467/129000  | Episode Reward: 10  | Average Reward 5.87  | Actor loss: 0.03 | Critic loss: 13.44 | Entropy loss: -0.0000  | Total Loss: 13.47 | Total Steps: 6
---capsule---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Step: 250
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step r

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 494/129000  | Episode Reward: 4  | Average Reward 5.29  | Actor loss: -1.01 | Critic loss: 6.73 | Entropy loss: -0.0023  | Total Loss: 5.71 | Total Steps: 42
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 495/129000  | Episode Reward: 4  | Average Reward 5.25  | Actor loss: -0.69 | Critic loss: 4.52 | Entropy loss: -0.0066  | Total Loss: 3.82 | Total Steps: 55
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 496/129000  | Episode Reward: -2  | Average Reward 5.22  | Actor loss: -0.77 | Critic loss: 11.84 | Entropy loss: -0.0030  | Total Loss: 11.07 | Total Steps: 58
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 497/12

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 16/100  | Episode Reward: 1  | Average Reward -2.04  | Actor loss: 0.00 | Critic loss: 2.87 | Entropy loss: -0.0221  | Total Loss: 2.86 | Total Steps: 51
TEST: ---black---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: 10  | Average Reward -2.00  | Actor loss: 0.04 | Critic loss: 7.26 | Entropy loss: -0.0054  | Total Loss: 7.30 | Total Steps: 6
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 4  | Average Reward -1.97  | Actor loss: 0.00 | Critic loss: 2.60 | Entropy loss: -0.0151  | Total Loss: 2.59 | Total Steps: 45
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Ter

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 4  | Average Reward -1.96  | Actor loss: 0.94 | Critic loss: 5.61 | Entropy loss: -0.0014  | Total Loss: 6.55 | Total Steps: 47
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 44/100  | Episode Reward: 7  | Average Reward -1.96  | Actor loss: 0.00 | Critic loss: 6.54 | Entropy loss: -0.0006  | Total Loss: 6.54 | Total Steps: 34
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 45/100  | Episode Reward: 4  | Average Reward -1.92  | Actor loss: 0.02 | Critic loss: 2.30 | Entropy loss: -0.0042  | Total Loss: 2.31 | Total Steps: 49
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 46/100  | Episode Reward: 10  | Average Reward -1.92  |

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 400
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 70/100  | Episode Reward: -85  | Average Reward -2.27  | Actor loss: -1.09 | Critic loss: 117.40 | Entropy loss: -0.0345  | Total Loss: 116.27 | Total Steps: 500
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 10  | Average Reward -2.27  | Actor loss: 0.03 | Critic loss: 1.27 | En

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: -5  | Average Reward -2.54  | Actor loss: 10.14 | Critic loss: 8.04 | Entropy loss: -0.0369  | Total Loss: 18.14 | Total Steps: 109
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: 7  | Average Reward -2.53  | Actor loss: 0.01 | Critic loss: 3.16 | Entropy loss: -0.0014  | Total Loss: 3.17 | Total Steps: 38
TEST: ---green---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 10  | Average Reward -2.23  | Actor loss: 0.00 | Critic loss: 2.74 | Entropy loss: -0.0007  | Total Loss: 2.75 | Total Steps: 6
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision St

Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 520/129000  | Episode Reward: 4  | Average Reward 5.26  | Actor loss: -0.08 | Critic loss: 6.56 | Entropy loss: -0.0024  | Total Loss: 6.48 | Total Steps: 115
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 521/129000  | Episode Reward: 10  | Average Reward 5.26  | Actor loss: 0.04 | Critic loss: 4.23 | Entropy loss: -0.0002  | Total Loss: 4.27 | Total Steps: 30
---green---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 522/129000  | Episode Reward: 7  | Average Reward 5.26  | Actor loss: 0.00 | Critic loss: 2.96 | Entropy loss: -0.0002  | Total Loss: 2.97 | Total Steps: 42
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 523/129000  | Episode Reward: 7  | Average Reward 5.25  | Actor loss: -0.01 | Critic loss: 4.78 |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 549/129000  | Episode Reward: -1  | Average Reward 5.08  | Actor loss: -2.30 | Critic loss: 9.54 | Entropy loss: -0.0166  | Total Loss: 7.22 | Total Steps: 76
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 550/129000  | Episode Reward: 4  | Average Reward 5.06  | Actor loss: -0.01 | Critic loss: 9.13 | Entropy loss: -0.0022  | Total Loss: 9.12 | Total Steps: 53
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 551/129000  | Episode Reward: 10  | Average Reward 5.06  | Actor loss: 0.01 | Critic loss: 3.35 | Entropy loss: -0.0000  | Total Loss: 3.36 | Total Steps: 6
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 552/129000  | Episode Reward: 7  | Average Reward 5.08  | Actor loss: 0.12 | Critic loss: 7.06 | Entropy loss: -0.0004  | T

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 580/129000  | Episode Reward: 10  | Average Reward 5.02  | Actor loss: 0.01 | Critic loss: 1.36 | Entropy loss: -0.0000  | Total Loss: 1.37 | Total Steps: 6
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 581/129000  | Episode Reward: 10  | Average Reward 5.02  | Actor loss: 0.01 | Critic loss: 0.89 | Entropy loss: -0.0000  | Total Loss: 0.90 | Total Steps: 6
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 582/129000  | Episode Reward: 10  | Average Reward 5.07  | Actor loss: 0.51 | Critic loss: 3.68 | Entropy loss: -0.0006  | Total Loss: 4.19 | Total Steps: 8
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 583/129000  | Episode Reward: 4  | Average Reward 5.05  | Actor loss: -0.04 | Critic loss: 5.78 | Entropy loss: -0.0003  | Total Loss: 5.74 | Total Steps: 

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3/100  | Episode Reward: 7  | Average Reward -3.46  | Actor loss: 0.07 | Critic loss: 8.43 | Entropy loss: -0.0044  | Total Loss: 8.50 | Total Steps: 95
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 1  | Average Reward -3.46  | Actor loss: 0.33 | Critic loss: 5.69 | Entropy loss: -0.0046  | Total Loss: 6.02 | Total Steps: 52
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 5/100  | Episode Reward: 1  | Average Reward -3.42  | Actor loss: 0.00 | Critic loss: 2.82 | Entropy loss: -0.0019  | Total Loss: 2.82 | Total Steps: 52
TEST: ---prism---
TE

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 26/100  | Episode Reward: 10  | Average Reward -2.57  | Actor loss: 0.19 | Critic loss: 11.60 | Entropy loss: -0.0015  | Total Loss: 11.79 | Total Steps: 31
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 7  | Average Reward -2.58  | Actor loss: 1.69 | Critic loss: 15.31 | Entropy loss: -0.0037  | Total Loss: 17.00 | Total Steps: 31
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 7  | Average Reward -2.57  | Actor loss: 2.00 | Critic loss: 13.45 | Entropy loss: -0.0067  | Total Loss: 15.44 | Total Steps: 40
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 7  | Average R

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 47/100  | Episode Reward: -15  | Average Reward -3.73  | Actor loss: 3.19 | Critic loss: 19.44 | Entropy loss: -0.0432  | Total Loss: 22.59 | Total Steps: 166
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: 7  | Average Reward -3.74  | Actor loss: 0.00 | Critic loss: 4.27 | Entropy loss: -0.0015  | Total Loss: 4.27 | Total Steps: 38
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: T

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 70/100  | Episode Reward: -109  | Average Reward -3.10  | Actor loss: -0.01 | Critic loss: 59.88 | Entropy loss: -0.0217  | Total Loss: 59.85 | Total Steps: 500
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 4  | Average Reward -2.50  | Actor loss: 0.00 | Critic loss: 1.13 | Entropy loss: -0.0017  | Total Loss: 1.13 | Total Steps: 42
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 1  | Average Reward -2.52  | Actor loss: 0.01 | Critic loss: 0.90

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 90/100  | Episode Reward: 10  | Average Reward -2.01  | Actor loss: 0.18 | Critic loss: 11.59 | Entropy loss: -0.0010  | Total Loss: 11.78 | Total Steps: 31
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
T

---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 613/129000  | Episode Reward: 10  | Average Reward 5.13  | Actor loss: 0.31 | Critic loss: 5.21 | Entropy loss: -0.0011  | Total Loss: 5.52 | Total Steps: 32
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 614/129000  | Episode Reward: -9  | Average Reward 5.05  | Actor loss: -0.97 | Critic loss: 20.98 | Entropy loss: -0.0106  | Total Loss: 19.99 | Total Steps: 142
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 615/129000  | Episode Reward: 10  | Average Reward 5.07  | Actor loss: 0.95 | Critic loss: 1.19 | Entropy loss: -0.0040  | Total Loss: 2.14 | Total Steps: 15
---sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epis

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 642/129000  | Episode Reward: 10  | Average Reward 5.13  | Actor loss: 0.20 | Critic loss: 5.51 | Entropy loss: -0.0007  | Total Loss: 5.71 | Total Steps: 30
---prism---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 643/129000  | Episode Reward: 7  | Average Reward 5.14  | Actor loss: 0.04 | Critic loss: 9.36 | Entropy loss: -0.0002  | Total Loss: 9.40 | Total Steps: 30
---cylinder---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 644/129000  | Episode Reward: 7  | Average Reward 5.16  | Actor loss: 0.12 | Critic loss: 8.87 | Entropy loss: -0.0005  | Total Loss: 8.98 | Total Steps: 32
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 645/129000  | Episode Reward: 10  | Average Reward 5.19  | Actor loss: 0.01 | Critic loss: 4.03 | Entropy loss: -0.0000  | Total Loss: 4.03 | Total 

Training  | Episode: 671/129000  | Episode Reward: 10  | Average Reward 5.80  | Actor loss: 0.01 | Critic loss: 2.83 | Entropy loss: -0.0000  | Total Loss: 2.84 | Total Steps: 6
---sphere---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 672/129000  | Episode Reward: 7  | Average Reward 5.79  | Actor loss: 0.07 | Critic loss: 5.23 | Entropy loss: -0.0019  | Total Loss: 5.29 | Total Steps: 43
---capsule---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 673/129000  | Episode Reward: 7  | Average Reward 5.80  | Actor loss: 0.18 | Critic loss: 7.55 | Entropy loss: -0.0007  | Total Loss: 7.73 | Total Steps: 31
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 674/129000  | Episode Reward: 4  | Average Reward 5.79  | Actor loss: -0.68 | Critic loss: 9.56 | Entropy loss: -0.0113  | Total Loss: 8.87 | Tot

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 701/129000  | Episode Reward: 4  | Average Reward 5.88  | Actor loss: -0.17 | Critic loss: 4.69 | Entropy loss: -0.0026  | Total Loss: 4.52 | Total Steps: 51
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 1/100  | Episode Reward: 7  | Average Reward -1.94  | Actor loss: 0.17 | Critic loss: 13.75 | Entropy loss: -0.0316  | Total Loss: 13.88 | Total Steps: 105
TEST: ---blue---
TEST: Step: 100
TEST: Step: 200
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 7  | Average Reward -1.95  | Actor loss: 0.00 | Critic loss: 3.74 | Entropy loss: -0.0017  | Total Loss: 3.74 | Total Steps: 365
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Ste

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 7  | Average Reward -1.06  | Actor loss: 0.00 | Critic loss: 5.48 | Entropy loss: -0.0004  | Total Loss: 5.48 | Total Steps: 38
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 7  | Average Reward -1.05  | Actor loss: 0.05 | Critic loss: 2.67 | Entropy loss: -0.0192  | Total Loss: 2.70 | Total Steps: 50
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 29/100  | Episode Reward: 10  | Average Reward -1.01  | Actor loss: 0.24 | Critic loss: 14.55 | Entropy loss: -0.0057  | Total Loss: 14.78 | Total Steps: 6
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 30/100  | Episode Reward: 7  | Average Reward -1.01 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 54/100  | Episode Reward: 10  | Average Reward -1.25  | Actor loss: 0.04 | Critic loss: 2.61 | Entropy loss: -0.0268  | Total Loss: 2.62 | Total Steps: 30
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 55/100  | Episode Reward: 1  | Average Reward -1.24  | Actor loss: 0.01 | Critic loss: 1.30 | Entropy loss: -0.0073  | Total Loss: 1.29 | Total Steps: 54
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 10  | Average Reward -1.24  | Actor loss: 0.00 | Critic loss: 1.80 | Entropy loss: -0.0016  | Total Loss: 1.80 | Total Steps: 6
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Term

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episode Reward: 4  | Average Reward -1.11  | Actor loss: 0.00 | Critic loss: 3.28 | Entropy loss: -0.0158  | Total Loss: 3.27 | Total Steps: 39
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: 1  | Average Reward -1.11  | Actor loss: 0.01 | Critic loss: 1.40 | Entropy loss: -0.0020  | Total Loss: 1.41 | Total Steps: 50
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 4  | Average Reward -1.10  | Actor loss: 0.68 | Critic loss: 2.55 | Entropy loss: -0.0141  | Total Loss: 3.22 | Total Steps: 56
TEST: ---red---
TEST: Age

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 706/129000  | Episode Reward: 4  | Average Reward 5.91  | Actor loss: -0.02 | Critic loss: 4.36 | Entropy loss: -0.0003  | Total Loss: 4.35 | Total Steps: 53
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 707/129000  | Episode Reward: 10  | Average Reward 5.91  | Actor loss: 0.07 | Critic loss: 2.03 | Entropy loss: -0.0001  | Total Loss: 2.10 | Total Steps: 6
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 708/129000  | Episode Reward: 1  | Average Reward 5.88  | Actor loss: 0.10 | Critic loss: 7.89 | Entropy loss: -0.0016  | Total Loss: 7.99 | Total Steps: 53
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 709/129000  | Episode Reward: 10  | Average Reward 5.89  | Actor loss: -0.00 | Critic loss: 1.82 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 736/129000  | Episode Reward: 10  | Average Reward 6.13  | Actor loss: 0.11 | Critic loss: 3.73 | Entropy loss: -0.0005  | Total Loss: 3.84 | Total Steps: 29
---black---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 737/129000  | Episode Reward: 1  | Average Reward 6.09  | Actor loss: -0.52 | Critic loss: 8.54 | Entropy loss: -0.0035  | Total Loss: 8.01 | Total Steps: 53
---red---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 738/129000  | Episode Reward: 7  | Average Reward 6.08  | Actor loss: 0.53 | Critic loss: 6.14 | Entropy loss: -0.0018  | Total Loss: 6.67 | Total Steps: 31
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 739/129000  | Episode Reward: 10  | Average Reward 6.11  | Actor loss: 0.01 | Critic loss: 1.38 |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 766/129000  | Episode Reward: 7  | Average Reward 6.19  | Actor loss: 0.07 | Critic loss: 5.71 | Entropy loss: -0.0003  | Total Loss: 5.78 | Total Steps: 31
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 767/129000  | Episode Reward: 10  | Average Reward 6.19  | Actor loss: 0.01 | Critic loss: 6.42 | Entropy loss: -0.0000  | Total Loss: 6.43 | Total Steps: 6
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 768/129000  | Episode Reward: 4  | Average Reward 6.19  | Actor loss: -0.11 | Critic loss: 4.75 | Entropy loss: -0.0007  | Total Loss: 4.64 | Total Steps: 42
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 769/129000  | Episode Reward: 10  | Average Reward 6.22  | Actor loss: 0.04 | Critic loss: 1.59 | Entropy loss: -0.0000  | Total Loss: 1.63 | Total Steps

Training  | Episode: 797/129000  | Episode Reward: 4  | Average Reward 6.29  | Actor loss: -0.41 | Critic loss: 6.38 | Entropy loss: -0.0015  | Total Loss: 5.97 | Total Steps: 34
---blue---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 798/129000  | Episode Reward: 10  | Average Reward 6.30  | Actor loss: 0.52 | Critic loss: 0.70 | Entropy loss: -0.0016  | Total Loss: 1.23 | Total Steps: 7
---black---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 799/129000  | Episode Reward: 7  | Average Reward 6.30  | Actor loss: -0.29 | Critic loss: 3.88 | Entropy loss: -0.0023  | Total Loss: 3.59 | Total Steps: 47
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 800/129000  | Episode Reward: 7  | Average Reward 6.29  | Actor loss: 0.05 | Critic loss: 5.47 | Entropy loss: -0.0003  | Total Loss: 5.51 | Total Steps: 30
---cylinder---
Decision Step reward: -3.0
Age

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 16/100  | Episode Reward: -124  | Average Reward -0.79  | Actor loss: -0.03 | Critic loss: 61.81 | Entropy loss: -0.0175  | Total Loss: 61.76 | Total Steps: 500
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: 4  | Average Reward -0.80  | Actor loss: 0.03 | Critic loss: 5.38 | Entropy loss: -0.0020  | Total Loss: 5.41 | Total Steps: 42
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 7  | Average Reward -0.81  | Actor loss: 0.00 | Critic loss: 1.54 | Entropy loss: -0.0004  | To

TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 33/100  | Episode Reward: -57  | Average Reward -2.29  | Actor loss: 5.99 | Critic loss: 4.58 | Entropy loss: -0.0148  | Total Loss: 10.56 | Total Steps: 324
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0


TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: 1  | Average Reward -2.79  | Actor loss: 0.65 | Critic loss: 11.22 | Entropy loss: -0.0062  | Total Loss: 11.86 | Total Steps: 52
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 49/100  | Episode Reward: 7  | Average Reward -2.76  | Actor loss: 0.02 | Critic loss: 3.36 | Entropy loss: -0.0053  | Total Loss: 3.38 | Total Steps: 34
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 50/100  | Episode Reward: 7  | Average Reward -2.76  | Actor loss: 0.00 | Critic loss: 2.59 | Entropy loss: -0.0018  | Total Loss: 2.59 | Total Steps: 30
TEST: ---cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Test

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 400
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 68/100  | Episode Reward: -97  | Average Reward -4.16  | Actor loss: -2.15 | Critic loss: 87.40 | Entropy loss: -0.0269  | Total L

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 89/100  | Episode Reward: -2  | Average Reward -3.13  | Actor loss: 0.00 | Critic loss: 9.14 | Entropy loss: -0.0118  | Total Loss: 9.13 | Total Steps: 80
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 814/129000  | Episode Reward: 4  | Average Reward 6.33  | Actor loss: -0.23 | Critic loss: 5.79 | Entropy loss: -0.0021  | Total Loss: 5.55 | Total Steps: 46
---sphere---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 815/129000  | Episode Reward: 7  | Average Reward 6.31  | Actor loss: -0.71 | Critic loss: 9.15 | Entropy loss: -0.0028  | Total Loss: 8.44 | Total Steps: 33
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 816/129000  | Episode Reward: 10  | Average Reward 6.31  | Actor loss: 0.00 | Critic loss: 0.97 | Entropy loss: -0.0000  | Total Loss: 0.97 | Total Steps: 6
---sphere---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 817/129000  | Episode Reward: 7  | Average Reward 6.29  | Actor loss: 0.07 | Critic loss: 5.78 | Entropy loss: -0.000

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 844/129000  | Episode Reward: 10  | Average Reward 6.46  | Actor loss: 0.14 | Critic loss: 3.16 | Entropy loss: -0.0076  | Total Loss: 3.30 | Total Steps: 63
---capsule---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 845/129000  | Episode Reward: 7  | Average Reward 6.45  | Actor loss: 0.08 | Critic loss: 8.12 | Entropy loss: -0.0005  | Total Loss: 8.20 | Total Steps: 29
---capsule---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 846/129000  | Episode Reward: 4  | Average Reward 6.42  | Actor loss: -0.05 | Critic loss: 8.96 | Entropy loss: -0.0010  | Total Loss: 8.91 | Total Steps: 46
---cylinder---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 847/129000  | Episode Reward: 4  | Average Reward 6.43  | Act

Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 873/129000  | Episode Reward: 1  | Average Reward 6.41  | Actor loss: -0.33 | Critic loss: 12.48 | Entropy loss: -0.0020  | Total Loss: 12.15 | Total Steps: 84
---sphere---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 874/129000  | Episode Reward: 7  | Average Reward 6.42  | Actor loss: -0.43 | Critic loss: 3.04 | Entropy loss: -0.0072  | Total Loss: 2.60 | Total Steps: 62
---capsule---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 875/129000  | Episode Reward: 7  | Average Reward 6.42  | Actor loss: 0.15 | Critic loss: 6.19 | Entropy loss: -0.0027  | Total Loss: 6.34 | Total Steps: 53
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 876/129000  | Episode Reward: 10  | Average Reward 6.42  | Ac

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 2/100  | Episode Reward: 7  | Average Reward -2.78  | Actor loss: 0.01 | Critic loss: 3.46 | Entropy loss: -0.0006  | Total Loss: 3.47 | Total Steps: 38
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 3/100  | Episode Reward: 6  | Average Reward -2.75  | Actor loss: 0.01 | Critic loss: 3.24 | Entropy loss: -0.0375  | Total Loss: 3.21 | Total Steps: 67
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 4/100  | Episode Reward: 4  | Average Reward -2.73  | Actor loss: 0.00 | Critic loss: 1.82 | Entropy loss: -0.0103  | Total Loss: 1.81 | Total Steps: 42
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 26/100  | Episode Reward: 7  | Average Reward -3.23  | Actor loss: 0.01 | Critic loss: 16.69 | Entropy loss: -0.0013  | Total Loss: 16.70 | Total Steps: 29
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 27/100  | Episode Reward: 7  | Average Reward -3.23  | Actor loss: 0.01 | Critic loss: 2.80 | Entropy loss: -0.0227  | Total Loss: 2.79 | Total Steps: 67
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 28/100  | Episode Reward: 4  | Average Reward -3.25  | Actor loss: 0.00 | Critic loss: 4.10 | Entropy loss: -0.0016  | Total Loss: 4.10 | Total Steps: 42
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision St

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 51/100  | Episode Reward: 4  | Average Reward -3.42  | Actor loss: 0.00 | Critic loss: 4.19 | Entropy loss: -0.0070  | Total Loss: 4.19 | Total Steps: 47
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 52/100  | Episode Reward: 10  | Average Reward -3.42  | Actor loss: 0.03 | Critic loss: 2.13 | Entropy loss: -0.0490  | Total Loss: 2.12 | Total Steps: 12
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 53/100  | Episode Reward: -2  | Average Reward -3.48  | Actor loss: 0.01 | Critic loss: 2.50 | Entropy loss: -0.0204  | Total Loss: 2.49 | Total Steps: 56
TEST: ---black---
TEST: Decision Step reward: -3.0
T

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 77/100  | Episode Reward: 7  | Average Reward -3.00  | Actor loss: 0.02 | Critic loss: 2.33 | Entropy loss: -0.0028  | Total Loss: 2.35 | Total Steps: 30
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episode Reward: 4  | Average Reward -3.00  | Actor loss: 0.23 | Critic loss: 2.93 | Entropy loss: -0.0064  | Total Loss: 3.15 | Total Steps: 51
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: 4  | Average Reward -2.98  | Actor loss: 1.06 | Critic loss: 13.66 | Entropy loss: -0.0420  | Total Loss: 14.69 | Total Steps: 117
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Agent in term

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 904/129000  | Episode Reward: 0  | Average Reward 6.29  | Actor loss: -2.25 | Critic loss: 9.92 | Entropy loss: -0.0089  | Total Loss: 7.65 | Total Steps: 58
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 905/129000  | Episode Reward: 10  | Average Reward 6.30  | Actor loss: 0.86 | Critic loss: 4.08 | Entropy loss: -0.0009  | Total Loss: 4.95 | Total Steps: 8
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 906/129000  | Episode Reward: 10  | Average Reward 6.33  | Actor loss: 0.01 | Critic loss: 3.12 | Entropy loss: -0.0000  | Total Loss: 3.13 | Total Steps: 6
---green---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 907/129000  | Episode Reward: 7  | Average Reward 6.32  | Actor loss: 0.35 | Critic loss: 7.64 | Entropy loss: -0.0020  | Total Loss: 7.99 | Total Steps: 31
---blue---
Decision S

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 934/129000  | Episode Reward: 7  | Average Reward 6.23  | Actor loss: 0.57 | Critic loss: 4.10 | Entropy loss: -0.0035  | Total Loss: 4.67 | Total Steps: 31
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 935/129000  | Episode Reward: 10  | Average Reward 6.23  | Actor loss: 0.01 | Critic loss: 0.95 | Entropy loss: -0.0000  | Total Loss: 0.96 | Total Steps: 6
---yellow---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 936/129000  | Episode Reward: 7  | Average Reward 6.21  | Actor loss: 0.12 | Critic loss: 7.96 | Entropy loss: -0.0010  | Total Loss: 8.08 | Total Steps: 30
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 937/129000  | Episode Reward: 4  | Average Reward 6.23  | Actor loss: 0.08 | Critic loss: 7.65 | Entropy loss: -0.0015  | To

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 964/129000  | Episode Reward: 4  | Average Reward 6.26  | Actor loss: -0.04 | Critic loss: 7.11 | Entropy loss: -0.0004  | Total Loss: 7.06 | Total Steps: 53
---red---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 965/129000  | Episode Reward: 7  | Average Reward 6.25  | Actor loss: 0.46 | Critic loss: 6.55 | Entropy loss: -0.0015  | Total Loss: 7.02 | Total Steps: 30
---yellow---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 966/129000  | Episode Reward: 4  | Average Reward 6.23  | Actor loss: -0.25 | Critic loss: 4.53 | Entropy loss: -0.0039  | Total Loss: 4.28 | Total Steps: 56
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 967/129000  | Episode Reward: 10  | Average Reward 6.23  | Actor loss: 0.03 | Critic loss: 3.16 

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 994/129000  | Episode Reward: 4  | Average Reward 6.07  | Actor loss: -0.04 | Critic loss: 7.22 | Entropy loss: -0.0005  | Total Loss: 7.19 | Total Steps: 42
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 995/129000  | Episode Reward: 7  | Average Reward 6.05  | Actor loss: -0.07 | Critic loss: 5.97 | Entropy loss: -0.0024  | Total Loss: 5.89 | Total Steps: 53
---green---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 996/129000  | Episode Reward: 7  | Average Reward 6.04  | Actor loss: 0.33 | Critic loss: 6.90 | Entropy loss: -0.0015  | Total Loss: 7.22 | Total Steps: 29
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 997/129000  | Episode Reward: 10  | Average Reward 6.07  | Actor loss: 0.24 | Critic loss: 5.56 | Entropy loss: -0.0003  | To

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: -2  | Average Reward -1.31  | Actor loss: 0.81 | Critic loss: 18.70 | Entropy loss: -0.0368  | Total Loss: 19.47 | Total Steps: 115
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 7  | Average Reward -1.31  | Actor loss: 0.76 | Critic loss: 21.43 | Entropy loss: -0.0279  | Total Loss: 22.16 | Total Steps: 59
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 21/100  | Episode Reward: 7  | Average Reward -1.32  | Actor loss: 0.36 | Critic loss: 17.17 | Entropy loss: -0.0081  | Total Loss: 17.53 | Total Steps: 27
TEST: ---cylinder---
TEST: 

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 1  | Average Reward 0.39  | Actor loss: 0.03 | Critic loss: 1.17 | Entropy loss: -0.0371  | Total Loss: 1.16 | Total Steps: 85
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 44/100  | Episode Reward: 7  | Average Reward 0.40  | Actor loss: 0.72 | Critic loss: 15.63 | Entropy loss: -0.0368  | Total Loss: 16.31 | Total Steps: 28
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 45/100  | Episode Reward: 7  | Average Reward 0.43  | Actor loss: 1.70 | Critic loss: 25.68 | Entropy loss: -0.0187  | Total Loss: 27.36 | Total Steps: 33
TEST: ---capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0


TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 0  | Average Reward 2.10  | Actor loss: 0.64 | Critic loss: 19.26 | Entropy loss: -0.0221  | Total Loss: 19.88 | Total Steps: 104
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: -2  | Average Reward 2.07  | Actor loss: 0.01 | Critic loss: 0.60 | Entropy loss: -0.0242  | Total Loss: 0.59 | Total Steps: 55
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 1  | Average Reward 2.04  | Act

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 1  | Average Reward 3.33  | Actor loss: 0.10 | Critic loss: 8.04 | Entropy loss: -0.0060  | Total Loss: 8.14 | Total Steps: 52
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST:

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1022/129000  | Episode Reward: 7  | Average Reward 5.80  | Actor loss: -0.21 | Critic loss: 9.60 | Entropy loss: -0.0075  | Total Loss: 9.38 | Total Steps: 55
---cylinder---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1023/129000  | Episode Reward: 7  | Average Reward 5.82  | Actor loss: -0.02 | Critic loss: 3.88 | Entropy loss: -0.0003  | Total Loss: 3.86 | Total Steps: 42
---cube---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1024/129000  | Episode Reward: 7  | Average Reward 5.82  | Actor loss: 0.21 | Critic loss: 6.98 | Entropy loss: -0.0008  | Total Loss: 7.18 | Total Steps: 29
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1025/129000  | Episode Reward: 4  | Average Reward 5.79  | Act

Training  | Episode: 1052/129000  | Episode Reward: 10  | Average Reward 5.95  | Actor loss: 0.02 | Critic loss: 1.62 | Entropy loss: -0.0000  | Total Loss: 1.64 | Total Steps: 6
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1053/129000  | Episode Reward: 10  | Average Reward 5.96  | Actor loss: 0.02 | Critic loss: 1.90 | Entropy loss: -0.0000  | Total Loss: 1.91 | Total Steps: 6
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1054/129000  | Episode Reward: 10  | Average Reward 5.96  | Actor loss: 0.01 | Critic loss: 1.30 | Entropy loss: -0.0000  | Total Loss: 1.31 | Total Steps: 6
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1055/129000  | Episode Reward: 10  | Average Reward 6.00  | Actor loss: 0.40 | Critic loss: 5.45 | Entropy loss: -0.0027  | Total Loss: 5.85 | Total Steps: 10
---prism---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0


Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1083/129000  | Episode Reward: 4  | Average Reward 6.18  | Actor loss: -0.06 | Critic loss: 6.84 | Entropy loss: -0.0015  | Total Loss: 6.77 | Total Steps: 45
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1084/129000  | Episode Reward: 1  | Average Reward 6.16  | Actor loss: -0.16 | Critic loss: 12.92 | Entropy loss: -0.0006  | Total Loss: 12.76 | Total Steps: 52
---capsule---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1085/129000  | Episode Reward: 7  | Average Reward 6.14  | Actor loss: 0.04 | Critic loss: 3.55 | Entropy loss: -0.0014  | Total Loss: 3.59 | Total Steps: 47
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1086/12

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 11/100  | Episode Reward: 4  | Average Reward 2.88  | Actor loss: 0.00 | Critic loss: 4.14 | Entropy loss: -0.0010  | Total Loss: 4.14 | Total Steps: 42
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 12/100  | Episode Reward: 7  | Average Reward 2.88  | Actor loss: 0.04 | Critic loss: 2.77 | Entropy loss: -0.0032  | Total Loss: 2.81 | Total Steps: 31
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 13/100  | Episode Reward: 10  | Average Reward 2.92  | Actor loss: 0.10 | Critic loss: 10.28 | Entropy loss: -0.0058  | Total Loss: 10.37 | Total Steps: 6
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: T

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 10  | Average Reward 4.11  | Actor loss: 0.30 | Critic loss: 4.85 | Entropy loss: -0.0173  | Total Loss: 5.13 | Total Steps: 14
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 10  | Average Reward 4.13  | Actor loss: 0.02 | Critic loss: 29.16 | Entropy loss: -0.0349  | Total Loss: 29.14 | Total Steps: 8
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 42/100  | Episode Reward: 6  | Average Reward 4.13  | Actor loss: 0.02 | Critic loss: 0.63 | Entropy loss: -0.0350  | Total Loss: 0.61 | Total Steps: 67
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: 10  | Average Reward 4.17  | Actor loss: 0.01 | Critic loss: 

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 69/100  | Episode Reward: 4  | Average Reward 4.27  | Actor loss: 0.00 | Critic loss: 4.66 | Entropy loss: -0.0032  | Total Loss: 4.65 | Total Steps: 43
TEST: ---yellow---
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 70/100  | Episode Reward: 9  | Average Reward 4.31  | Actor loss: 0.00 | Critic loss: 0.45 | Entropy loss: -0.0301  | Total Loss: 0.43 | Total Steps: 66
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 10  | Average Reward 4.36  | Actor loss: 0.00 | Critic loss: 3.83 | Entropy loss: -0.0437  | Total Loss: 3.79 | Total Steps: 9
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Ep

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 10  | Average Reward 4.49  | Actor loss: 0.00 | Critic loss: 3.53 | Entropy loss: -0.0015  | Total Loss: 3.53 | Total Steps: 31
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 2  | Average Reward 4.48  | Actor loss: 0.00 | Critic loss: 0.94 | Entropy loss: -0.0311  | Total Loss: 0.91 | Total Steps: 67
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 1  | Average Reward 4.53  | Actor loss: 4.07 | Critic loss: 5.78 | Entropy loss: -0.0281  | Total Loss: 9.82 | Total Steps: 44
---black---
Agent in t

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1129/129000  | Episode Reward: 10  | Average Reward 6.41  | Actor loss: 0.03 | Critic loss: 1.90 | Entropy loss: -0.0000  | Total Loss: 1.93 | Total Steps: 6
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1130/129000  | Episode Reward: 10  | Average Reward 6.42  | Actor loss: 0.01 | Critic loss: 2.59 | Entropy loss: -0.0000  | Total Loss: 2.59 | Total Steps: 6
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1131/129000  | Episode Reward: 3  | Average Reward 6.40  | Actor loss: -0.42 | Critic loss: 9.67 | Entropy loss: -0.0033  | Total Loss: 9.25 | Total Steps: 54
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1132/129000  | Episode Reward: 4  | Average Reward 6.40  | Actor loss:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1160/129000  | Episode Reward: 7  | Average Reward 6.48  | Actor loss: -0.03 | Critic loss: 5.31 | Entropy loss: -0.0017  | Total Loss: 5.28 | Total Steps: 44
---yellow---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -1.0
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1161/129000  | Episode Reward: 2  | Average Reward 6.44  | Actor loss: -1.04 | Critic loss: 6.52 | Entropy loss: -0.0066  | Total Loss: 5.48 | Total Steps: 50
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1162/129000  | Episode Reward: 4  | Average Reward 6.44  | Actor loss: -0.16 | Critic loss: 6.45 | Entropy loss: -0.0011  | Total Loss: 6.29 | Total Steps: 54
---blue---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1163/129000  | Episode Reward: 10  | A

Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1190/129000  | Episode Reward: 1  | Average Reward 6.50  | Actor loss: -1.26 | Critic loss: 11.79 | Entropy loss: -0.0055  | Total Loss: 10.53 | Total Steps: 56
---black---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1191/129000  | Episode Reward: 4  | Average Reward 6.49  | Actor loss: -0.15 | Critic loss: 4.75 | Entropy loss: -0.0010  | Total Loss: 4.60 | Total Steps: 49
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1192/129000  | Episode Reward: 7  | Average Reward 6.49  | Actor loss: -0.20 | Critic loss: 2.67 | Entropy loss: -0.0023  | Total Loss: 2.47 | Total Steps: 43
---green---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Tra

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: 4  | Average Reward 4.86  | Actor loss: 0.09 | Critic loss: 8.07 | Entropy loss: -0.0076  | Total Loss: 8.15 | Total Steps: 43
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 7  | Average Reward 4.84  | Actor loss: 1.08 | Critic loss: 26.80 | Entropy loss: -0.0158  | Total Loss: 27.87 | Total Steps: 33
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: 7  | Average Reward 4.89  | Actor loss: 0.39 | Critic loss: 18.13 | Entropy loss: -0.0388  | Total Loss: 18.47 | Total Steps: 35
TEST: ---cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 10  | Average Reward 4.

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 37/100  | Episode Reward: 4  | Average Reward 4.18  | Actor loss: 1.12 | Critic loss: 24.00 | Entropy loss: -0.0201  | Total Loss: 25.10 | Total Steps: 62
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 38/100  | Episode Reward: 1  | Average Reward 4.16  | Actor loss: 0.41 | Critic loss: 2.44 | Entropy loss: -0.0059  | Total Loss: 2.85 | Total Steps: 51
TEST: ---cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 39/100  | Episode Reward: 10  | Average Reward 4.20  | Actor loss: 0.00 | Critic loss: 1.88 | Entropy loss: -0.0019  | Total Loss: 1.88 | Total Steps: 6
TEST: ---capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Step: 400
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: -28  | Average Reward 3.02  | Actor loss: 1.39 | Critic loss: 21.72 | Entropy loss: -0.0370  | Total Loss: 23.08 | Total Steps: 493
TEST: ---cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 58/100  | Episode Reward: 10  | Average Reward 3.03  | Actor loss: 0.01 | Critic loss: 11.93 | Entropy loss: -0.0374  | Total Loss: 1

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 78/100  | Episode Reward: 10  | Average Reward 2.39  | Actor loss: 0.01 | Critic loss: 12.71 | Entropy loss: -0.0014  | Total Loss: 12.72 | Total Steps: 31
TEST: ---prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: 10  | Average Reward 2.39  | Actor loss: 0.82 | Critic loss: 16.67 | Entropy loss: -0.0049  | Total Loss: 17.48 | Total Steps: 6
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 7  | Average Reward 2.39  | Actor loss: 0.95 | Critic loss: 17.99 | Entropy loss: -0.0123  | Total Loss: 18.93 | Total Steps: 24
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision

Testing  | Episode: 100/100  | Episode Reward: 10  | Average Reward 2.25  | Actor loss: 0.01 | Critic loss: 0.59 | Entropy loss: -0.0382  | Total Loss: 0.56 | Total Steps: 11
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1202/129000  | Episode Reward: 10  | Average Reward 6.49  | Actor loss: 0.01 | Critic loss: 1.59 | Entropy loss: -0.0000  | Total Loss: 1.60 | Total Steps: 6
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1203/129000  | Episode Reward: 1  | Average Reward 6.46  | Actor loss: -1.72 | Critic loss: 14.64 | Entropy loss: -0.0113  | Total Loss: 12.90 | Total Steps: 94
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1204/129000  | Episode Reward: 4  | Average Reward 6.45  | Actor loss: 0.12 | Critic loss: 8.16 | Entropy loss: -0.0

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1229/129000  | Episode Reward: 7  | Average Reward 6.70  | Actor loss: -0.11 | Critic loss: 3.38 | Entropy loss: -0.0006  | Total Loss: 3.27 | Total Steps: 42
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1230/129000  | Episode Reward: 10  | Average Reward 6.70  | Actor loss: 0.03 | Critic loss: 13.19 | Entropy loss: -0.0000  | Total Loss: 13.23 | Total Steps: 6
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 6.71. Model has been saved-----
Training  | Episode: 1231/129000  | Episode Reward: 10  | Average Reward 6.71  | Actor loss: 0.01 | Critic loss: 1.38 | Entropy loss: -0.0000  | Total Loss: 1.39 | Total Steps: 6
---cube---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1232/129000  | Episode Reward: 7  | Average Rew

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1259/129000  | Episode Reward: 7  | Average Reward 6.50  | Actor loss: -0.38 | Critic loss: 9.64 | Entropy loss: -0.0022  | Total Loss: 9.26 | Total Steps: 31
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1260/129000  | Episode Reward: 4  | Average Reward 6.50  | Actor loss: -0.47 | Critic loss: 9.32 | Entropy loss: -0.0040  | Total Loss: 8.84 | Total Steps: 54
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1261/129000  | Episode Reward: 10  | Average Reward 6.53  | Actor loss: 0.00 | Critic loss: 1.36 | Entropy loss: -0.0000  | Total Loss: 1.37 | Total Steps: 6
---cylinder---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1262/129000  | Episode Reward: 4  | Average Reward 6.51  | A

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1289/129000  | Episode Reward: 7  | Average Reward 6.50  | Actor loss: 0.11 | Critic loss: 7.65 | Entropy loss: -0.0005  | Total Loss: 7.76 | Total Steps: 29
---prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1290/129000  | Episode Reward: 10  | Average Reward 6.53  | Actor loss: 1.43 | Critic loss: 4.25 | Entropy loss: -0.0023  | Total Loss: 5.67 | Total Steps: 10
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1291/129000  | Episode Reward: 10  | Average Reward 6.58  | Actor loss: 0.03 | Critic loss: 1.61 | Entropy loss: -0.0000  | Total Loss: 1.64 | Total Steps: 6
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1292/129000  | Episode Reward: -2  | Average Reward 6.54  | Actor loss: -1.90 | Critic loss: 

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 16/100  | Episode Reward: 4  | Average Reward 2.21  | Actor loss: 0.01 | Critic loss: 1.18 | Entropy loss: -0.0029  | Total Loss: 1.18 | Total Steps: 42
TEST: ---black---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: 10  | Average Reward 2.23  | Actor loss: 0.02 | Critic loss: 5.74 | Entropy loss: -0.0013  | Total Loss: 5.76 | Total Steps: 6
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 10  | Average Reward 2.24  | Actor loss: 0.01 | Critic loss: 1.25 | Entropy loss: -0.0012  | Total Loss: 1.26 | Total Steps: 6
TEST: ---green---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: 10  | Average Reward 2.25  | Actor loss: 0.01 | Critic loss: 2.2

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 4  | Average Reward 1.36  | Actor loss: 0.03 | Critic loss: 3.71 | Entropy loss: -0.0005  | Total Loss: 3.74 | Total Steps: 46
TEST: ---green---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 10  | Average Reward 1.36  | Actor loss: 0.00 | Critic loss: 2.45 | Entropy loss: -0.0020  | Total Loss: 2.45 | Total Steps: 6
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 42/100  | Episode Reward: 1  | Average Reward 1.33  | Actor loss: 0.02 | Critic loss: 3.30 | Entropy loss: -0.0021  | Total Loss: 3.31 | Total Steps: 50
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: 

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 66/100  | Episode Reward: 7  | Average Reward 1.00  | Actor loss: 0.01 | Critic loss: 1.44 | Entropy loss: -0.0014  | Total Loss: 1.44 | Total Steps: 38
TEST: ---black---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 67/100  | Episode Reward: 10  | Average Reward 1.00  | Actor loss: 0.02 | Critic loss: 15.34 | Entropy loss: -0.0010  | Total Loss: 15.36 | Total Steps: 6
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 68/100  | Episode Reward: 7  | Average Reward 0.99  | Actor loss: 0.04 | Critic loss: 5.36 | Entropy loss: -0.0142  | Total Loss: 5.39 | Total Steps: 52
TEST: ---blue---
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 69/100  

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 93/100  | Episode Reward: 1  | Average Reward 0.78  | Actor loss: 0.01 | Critic loss: 14.41 | Entropy loss: -0.0103  | Total Loss: 14.41 | Total Steps: 66
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: 10  | Average Reward 0.81  | Actor loss: 0.05 | Critic loss: 1.79 | Entropy loss: -0.0089  | Total Loss: 1.83 | Total Steps: 8
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 7  | Average Reward 0.79  | Actor loss: 0.01 | Critic loss: 1.38 | Entropy loss: -0.0026  | Total Loss: 1.39 | Total Steps: 36
TEST: ---black---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 96/100  | Episode Reward: 10  | Average Reward 0.81  |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1325/129000  | Episode Reward: 7  | Average Reward 6.58  | Actor loss: 0.04 | Critic loss: 6.24 | Entropy loss: -0.0002  | Total Loss: 6.28 | Total Steps: 30
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1326/129000  | Episode Reward: 10  | Average Reward 6.59  | Actor loss: 0.01 | Critic loss: 0.62 | Entropy loss: -0.0000  | Total Loss: 0.62 | Total Steps: 6
---black---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1327/129000  | Episode Reward: 7  | Average Reward 6.59  | Actor loss: -0.16 | Critic loss: 4.08 | Entropy loss: -0.0021  | Total Loss: 3.91 | Total Steps: 109
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1328/129000  | Episode Reward: 10  | Average Reward 6.59  | Actor loss: 0.01 | Critic loss: 0.77 | Entropy loss: -0.0000  | Total Loss: 0.78 | Total Steps: 6
---yellow---
Deci

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1357/129000  | Episode Reward: 10  | Average Reward 6.72  | Actor loss: -0.21 | Critic loss: 2.67 | Entropy loss: -0.0033  | Total Loss: 2.46 | Total Steps: 36
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1358/129000  | Episode Reward: 10  | Average Reward 6.72  | Actor loss: 0.01 | Critic loss: 0.73 | Entropy loss: -0.0000  | Total Loss: 0.73 | Total Steps: 6
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 6.765. Model has been saved-----
Training  | Episode: 1359/129000  | Episode Reward: 10  | Average Reward 6.76  | Actor loss: -0.49 | Critic loss: 3.02 | Entropy loss: -0.0053  | Total Loss: 2.53 | Total Steps: 52
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1360/129000  | Episode

Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 6.985. Model has been saved-----
Training  | Episode: 1386/129000  | Episode Reward: 10  | Average Reward 6.99  | Actor loss: 0.01 | Critic loss: 0.13 | Entropy loss: -0.0000  | Total Loss: 0.13 | Total Steps: 6
---blue---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1387/129000  | Episode Reward: 10  | Average Reward 6.99  | Actor loss: 0.01 | Critic loss: 0.31 | Entropy loss: -0.0000  | Total Loss: 0.32 | Total Steps: 6
---green---
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 7.03. Model has been saved-----
Training  | Episode: 1388/129000  | Episode Reward: 10  | Average Reward 7.03  | Actor loss: -0.32 | Critic loss: 2.43 | Entropy loss: -0.0063  | Total Loss: 2.10 | Total Steps: 64
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
T

TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 13/100  | Episode Reward: 4  | Average Reward 0.94  | Actor loss: 2.74 | Critic loss: 28.15 | Entropy loss: -0.0204  | Total Loss: 30.88 | Total Steps: 34
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 14/100  | Episode Reward: 4  | Average Reward 0.94  | Actor loss: 0.27 | Critic loss: 8.80 | Entropy loss: -0.0067  | Total Loss: 9.06 | Total Steps: 50
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 15/100  | Episode Reward: 1  | Average Reward 0.91  | Actor loss: 0.07 | Critic loss: 5.55 | Entropy loss: -0.0082  | Total Loss: 5.61 | Total Steps: 62
TEST:

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 400
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 38/100  | Episode Reward: -97  | Average Reward 1.50  | Actor loss: -19.71 | Critic loss: 80.62 | Entropy loss: -0.0229  | Total Loss: 60.88 | Total Steps: 500
TEST: ---cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 39/100  | Episode Reward: 10  | Average Reward 1.50  | Actor loss: 0.15 | Critic

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 56/100  | Episode Reward: 4  | Average Reward 1.32  | Actor loss: 19.70 | Critic loss: 18.25 | Entropy loss: -0.0401  | Total Loss: 37.91 | Total Steps: 116
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 57/100  | Episode Reward: -26  | Average Reward 1.33  | Actor loss: 0.12 | Critic loss: 7.68 | Entropy loss: -0.0066  | Total Loss: 7.79 | Total Steps: 184
TEST: ---

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 4  | Average Reward 1.51  | Actor loss: 0.00 | Critic loss: 1.99 | Entropy loss: -0.0020  | Total Loss: 1.99 | Total Steps: 49
TEST: ---cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 81/100  | Episode Reward: 10  | Average Reward 2.14  | Actor loss: 0.03 | Critic loss: 4.83 | Entropy loss: -0.0004  | Total Loss: 4.86 | Total Steps: 31
TEST: ---prism---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 82/100  | Episode Reward: 10  | Average Reward 2.14  | Actor loss: 1.48 | Critic loss: 5.36 | Entropy loss: -0.0527  | Total Loss: 6.79 | Total Steps: 11
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0


TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 7  | Average Reward 1.04  | Actor loss: 0.02 | Critic loss: 2.35 | Entropy loss: -0.0037  | Total Loss: 2.36 | Total Steps: 30
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 100/100  | Episode Reward: 1  | Average Reward 0.99  | Actor loss: 0.10 | Critic loss: 3.14 | Entropy loss: -0.0289  | Total Loss: 3.21 | Total Steps: 68
---cylinder---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1402/129000  | Episode Reward: 7  | Average Reward 7.01  | Actor loss: 0.00 | Critic loss: 5.41 | Entropy loss: -0.0008  | Total Loss: 5.41 | Total Steps: 30
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1403/129000  | Episode Reward: 10  | Average Reward 7.

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1428/129000  | Episode Reward: 1  | Average Reward 6.93  | Actor loss: -0.21 | Critic loss: 12.15 | Entropy loss: -0.0016  | Total Loss: 11.94 | Total Steps: 53
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1429/129000  | Episode Reward: -2  | Average Reward 6.88  | Actor loss: -0.46 | Critic loss: 16.73 | Entropy loss: -0.0039  | Total Loss: 16.27 | Total Steps: 73
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1430/129000  | Episode Reward: 3  | Average Reward 6.85  | Actor loss: 0.20 | Critic loss: 8.79 | Entropy loss: -0.0036  | Total Loss: 8.99 | Total Steps: 46
---cylinder---
Agent in terminal steps
Terminal Step reward: 10

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1457/129000  | Episode Reward: 7  | Average Reward 6.89  | Actor loss: 0.18 | Critic loss: 6.28 | Entropy loss: -0.0036  | Total Loss: 6.45 | Total Steps: 47
---cylinder---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1458/129000  | Episode Reward: 7  | Average Reward 6.88  | Actor loss: 0.07 | Critic loss: 4.80 | Entropy loss: -0.0003  | Total Loss: 4.87 | Total Steps: 30
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1459/129000  | Episode Reward: 1  | Average Reward 6.85  | Actor loss: -1.42 | Critic loss: 13.27 | Entropy loss: -0.0037  | Total Loss: 11.85 | Total Steps: 43
---prism---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1460/129000  | Episode Reward: 7  | Average Reward 6.87  | Act

Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1487/129000  | Episode Reward: 4  | Average Reward 6.93  | Actor loss: -0.29 | Critic loss: 5.92 | Entropy loss: -0.0085  | Total Loss: 5.62 | Total Steps: 56
---prism---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1488/129000  | Episode Reward: 7  | Average Reward 6.92  | Actor loss: -0.06 | Critic loss: 7.98 | Entropy loss: -0.0012  | Total Loss: 7.92 | Total Steps: 24
---cube---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1489/129000  | Episode Reward: 7  | Average Reward 6.92  | Actor loss: 0.12 | Critic loss: 5.50 | Entropy loss: -0.0009  | Total Loss: 5.62 | Total Steps: 32
---cylinder---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1490/129000  | Episode Reward: 4  | 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 15/100  | Episode Reward: 7  | Average Reward 1.02  | Actor loss: 0.01 | Critic loss: 1.79 | Entropy loss: -0.0088  | Total Loss: 1.79 | Total Steps: 30
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 16/100  | Episode Reward: 10  | Average Reward 1.05  | Actor loss: 0.00 | Critic loss: 1.12 | Entropy loss: -0.0025  | Total Loss: 1.13 | Total Steps: 6
TEST: ---green---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 17/100  | Episode Reward: 10  | Average Reward 1.05  | Actor loss: 0.01 | Critic loss: 2.48 | Entropy loss: -0.0075  | Total Loss: 2.48 | Total Steps: 6
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 4  | Average Reward 1.02  | Actor loss: 1.96 | Critic loss: 7.89 | 

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 43/100  | Episode Reward: -11  | Average Reward 1.89  | Actor loss: 0.61 | Critic loss: 14.62 | Entropy loss: -0.0336  | Total Loss: 15.20 | Total Steps: 247
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 44/100  | Episode Reward: 7  | Average Reward 1.87  | Actor loss: 7.14 | Critic loss: 8.85 | Entropy loss: -0.0311  | Total Loss: 15.96 | Total Steps: 99
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 45/100  | Episode Reward: 4  | Average Reward 1.84  | Actor loss: 0.03 | Cr

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 7  | Average Reward 2.00  | Actor loss: 0.21 | Critic loss: 4.32 | Entropy loss: -0.0036  | Total Loss: 4.53 | Total Steps: 31
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 0  | Average Reward 1.95  | Actor loss: 0.01 | Critic loss: 0.51 | Entropy loss: -0.0227  | Total Loss: 0.49 | Total Steps: 107
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 73/100  | Episode Reward: 10  | Average Reward 1.98  | Actor loss: 0.02 | Critic loss: 3.88 | Entropy loss: -0.0206  | Total Loss: 3.88 | Total Steps: 32
TEST: ---black---
TEST: Agent in terminal steps
TEST: Terminal Step rewar

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 97/100  | Episode Reward: 10  | Average Reward 2.00  | Actor loss: 0.00 | Critic loss: 2.22 | Entropy loss: -0.0030  | Total Loss: 2.22 | Total Steps: 6
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 7  | Average Reward 2.00  | Actor loss: 0.21 | Critic loss: 4.37 | Entropy loss: -0.0036  | Total Loss: 4.58 | Total Steps: 31
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 1  | Average Reward 1.97  | Actor loss: 5.68 | Critic loss: 2.03 | Entropy loss: -0.0160  | Total Loss: 7.69 | Total Steps: 54
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Ter

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1529/129000  | Episode Reward: 10  | Average Reward 6.84  | Actor loss: 0.01 | Critic loss: 2.20 | Entropy loss: -0.0000  | Total Loss: 2.21 | Total Steps: 6
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1530/129000  | Episode Reward: 10  | Average Reward 6.88  | Actor loss: 1.27 | Critic loss: 5.03 | Entropy loss: -0.0022  | Total Loss: 6.30 | Total Steps: 10
---red---
Decision Step reward: -3.0
Decision Step reward: -1.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1531/129000  | Episode Reward: 3  | Average Reward 6.85  | Actor loss: -0.48 | Critic loss: 8.09 | Entropy loss: -0.0035  | Total Loss: 7.60 | Total Steps: 55
---blue---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1532/129000  | Episode Reward: 10  | Average Reward 6.85  | Actor loss: 0.02 | Critic loss: 1.41 | Entropy loss: -0.0000 

Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1561/129000  | Episode Reward: 4  | Average Reward 6.74  | Actor loss: -0.72 | Critic loss: 5.45 | Entropy loss: -0.0058  | Total Loss: 4.72 | Total Steps: 59
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1562/129000  | Episode Reward: 10  | Average Reward 6.74  | Actor loss: 0.48 | Critic loss: 2.43 | Entropy loss: -0.0015  | Total Loss: 2.90 | Total Steps: 13
---red---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1563/129000  | Episode Reward: 7  | Average Reward 6.72  | Actor loss: 0.08 | Critic loss: 7.39 | Entropy loss: -0.0006  | Total Loss: 7.47 | Total Steps: 31
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1564/129000  | Episode Reward: 4  | Average Reward 6.71  | Actor loss:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1591/129000  | Episode Reward: 4  | Average Reward 6.46  | Actor loss: -0.16 | Critic loss: 5.46 | Entropy loss: -0.0008  | Total Loss: 5.30 | Total Steps: 42
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1592/129000  | Episode Reward: 7  | Average Reward 6.49  | Actor loss: -0.20 | Critic loss: 2.19 | Entropy loss: -0.0011  | Total Loss: 1.99 | Total Steps: 43
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1593/129000  | Episode Reward: 10  | Average Reward 6.49  | Actor loss: 0.01 | Critic loss: 0.69 | Entropy loss: -0.0000  | Total Loss: 0.69 | Total Steps: 6
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1594/129000  | Episode Reward: 4  | Average Reward 6.46  | Actor loss: 0.09 | Critic loss: 6.37 | Entropy loss: -0.0014  

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 18/100  | Episode Reward: 1  | Average Reward 1.75  | Actor loss: 13.38 | Critic loss: 7.50 | Entropy loss: -0.0170  | Total Loss: 20.86 | Total Steps: 56
TEST: ---capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 19/100  | Episode Reward: 10  | Average Reward 1.78  | Actor loss: 0.02 | Critic loss: 19.78 | Entropy loss: -0.0032  | Total Loss: 19.80 | Total Steps: 6
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 20/100  | Episode Reward: 7  | Average Reward 1.77  | Actor loss: 1.65 | Critic loss: 29.03 | Entropy loss: -0.0244  | Total Loss: 30.66 | Total Steps: 30
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 45/100  | Episode Reward: -8  | Average Reward 3.43  | Actor loss: 0.05 | Critic loss: 23.59 | Entropy loss: -0.0222  | Total Loss: 23.62 | Total Steps: 108
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision St

TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 61/100  | Episode Reward: -106  | Average Reward 2.73  | Actor loss: -0.00 | Critic loss: 80.02 | Entropy loss: -0.0225  | Total Loss: 80.00 | Total Steps: 500
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 62/100  | Episode Reward: 7  | Average Reward 2.73  | Actor loss: 2.11 | Critic loss: 16.36 | Entropy loss: -0.0188  | Total Loss: 18.45 | Total Steps: 25
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 63/100  | Episode Reward: 4  | Average Reward 2.72  | Actor loss: 0.01 | Critic loss: 11.26 | Entropy loss: -0.0017  | Total Loss: 11.26 | Total Steps: 42
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 64/100  | Episode Reward:

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 82/100  | Episode Reward: 1  | Average Reward 1.64  | Actor loss: 0.00 | Critic loss: 1.55 | Entropy loss: -0.0332  | Total Loss: 1.52 | Total Steps: 55
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 83/100  | Episode Reward: -5  | Average Reward 1.67  | Actor loss: 0.91 | Critic loss: 21.53 | Entropy loss: -0.0288  | Total Loss: 22.42 | Total Steps: 79
TEST: ---capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 84/100  | Episode Reward: 10  | Average Reward 1.72  | Actor loss: 0.02 | Critic loss: 19.56 | Entropy loss: -0.0051  | Total Loss: 19.57 | Total Steps: 6
TEST: ---prism---

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1605/129000  | Episode Reward: 10  | Average Reward 6.42  | Actor loss: 1.24 | Critic loss: 5.30 | Entropy loss: -0.0024  | Total Loss: 6.54 | Total Steps: 11
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1606/129000  | Episode Reward: 10  | Average Reward 6.44  | Actor loss: 0.21 | Critic loss: 4.05 | Entropy loss: -0.0006  | Total Loss: 4.25 | Total Steps: 29
---cylinder---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1607/129000  | Episode Reward: 10  | Average Reward 6.47  | Actor loss: -1.25 | Critic loss: 6.54 | Entropy loss: -0.0094  | Total Loss: 5.28 | Total Steps: 40
---capsule---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1608/129000  | Episode Reward: 1  | Average Reward 6.44  | Actor loss: -0.28 | Critic loss: 9.40 | Entropy lo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1635/129000  | Episode Reward: 10  | Average Reward 6.59  | Actor loss: 1.83 | Critic loss: 6.83 | Entropy loss: -0.0010  | Total Loss: 8.66 | Total Steps: 7
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1636/129000  | Episode Reward: 10  | Average Reward 6.59  | Actor loss: 0.01 | Critic loss: 0.87 | Entropy loss: -0.0000  | Total Loss: 0.88 | Total Steps: 6
---prism---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1637/129000  | Episode Reward: 7  | Average Reward 6.59  | Actor loss: -0.04 | Critic loss: 5.26 | Entropy loss: -0.0003  | Total Loss: 5.22 | Total Steps: 43
---prism---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1638/129000  | Episode Reward: 1  | Average Reward 6.55  | Actor loss: -0.79 | Critic loss: 9.

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1665/129000  | Episode Reward: 7  | Average Reward 6.61  | Actor loss: 0.12 | Critic loss: 4.29 | Entropy loss: -0.0006  | Total Loss: 4.40 | Total Steps: 29
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1666/129000  | Episode Reward: 10  | Average Reward 6.62  | Actor loss: 0.25 | Critic loss: 15.74 | Entropy loss: -0.0001  | Total Loss: 15.99 | Total Steps: 6
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1667/129000  | Episode Reward: 4  | Average Reward 6.59  | Actor loss: -0.26 | Critic loss: 8.14 | Entropy loss: -0.0032  | Total Loss: 7.88 | Total Steps: 55
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1668/129000  | Episode Reward: 4  | Average Reward 6.57  | Actor loss: -0.04 | Critic los

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1695/129000  | Episode Reward: 7  | Average Reward 6.51  | Actor loss: 0.27 | Critic loss: 5.17 | Entropy loss: -0.0012  | Total Loss: 5.44 | Total Steps: 31
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1696/129000  | Episode Reward: 4  | Average Reward 6.49  | Actor loss: 0.03 | Critic loss: 7.91 | Entropy loss: -0.0019  | Total Loss: 7.93 | Total Steps: 44
---cylinder---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1697/129000  | Episode Reward: 7  | Average Reward 6.50  | Actor loss: -0.04 | Critic loss: 2.96 | Entropy loss: -0.0003  | Total Loss: 2.92 | Total Steps: 42
---prism---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1698/129000  | Episode Reward: 7  | Average Reward 6.50  | Actor loss: 0.18 | Critic loss: 

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 23/100  | Episode Reward: 7  | Average Reward 2.25  | Actor loss: 0.00 | Critic loss: 8.80 | Entropy loss: -0.0014  | Total Loss: 8.80 | Total Steps: 34
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 24/100  | Episode Reward: 7  | Average Reward 2.28  | Actor loss: 0.10 | Critic loss: 3.69 | Entropy loss: -0.0033  | Total Loss: 3.79 | Total Steps: 30
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 25/100  | Episode Reward: -2  | Average Reward 2.22  | Actor loss: 0.00 | Critic loss: 3.23 | Entropy loss: -0.0099  | Total Loss: 3.22 | Total Steps: 55
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Dec

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 48/100  | Episode Reward: -3  | Average Reward 1.93  | Actor loss: 0.02 | Critic loss: 4.01 | Entropy loss: -0.0380  | Total Loss: 3.99 | Total Steps: 63
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 49/100  | Episode Reward: 10  | Average Reward 1.93  | Actor loss: 0.01 | Critic loss: 1.69 | Entropy loss: -0.0075  | Total Loss: 1.69 | Total Steps: 6
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 50/100  | Episode Reward: 7  | Average Reward 1.91  | Actor loss: 0.05 | Critic loss: 2.10 | Entropy loss: -0.0065  | Total Loss: 2.14 | Total Steps: 39
TEST: ---yellow---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 51/100  | Episode Rewar

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 74/100  | Episode Reward: 7  | Average Reward 1.65  | Actor loss: 0.01 | Critic loss: 4.59 | Entropy loss: -0.0022  | Total Loss: 4.60 | Total Steps: 30
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 75/100  | Episode Reward: 7  | Average Reward 1.63  | Actor loss: 0.01 | Critic loss: 16.00 | Entropy loss: -0.0011  | Total Loss: 16.01 | Total Steps: 29
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 76/100  | Episode Reward: 7  | Average Reward 1.65  | Actor loss: 0.00 | Critic loss: 4.36 | Entropy loss: -0.0053  | Total Loss: 4.36 | Total Steps: 36
TEST: ---green---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 77/100  | Episode Reward: 10  | Average Reward 1.65  | Actor loss: 0.01 | Critic loss: 

---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1703/129000  | Episode Reward: 10  | Average Reward 6.50  | Actor loss: 0.48 | Critic loss: 1.82 | Entropy loss: -0.0009  | Total Loss: 2.30 | Total Steps: 9
---green---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1704/129000  | Episode Reward: 7  | Average Reward 6.50  | Actor loss: 0.00 | Critic loss: 5.92 | Entropy loss: -0.0016  | Total Loss: 5.92 | Total Steps: 45
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1705/129000  | Episode Reward: 10  | Average Reward 6.52  | Actor loss: -0.10 | Critic loss: 3.59 | Entropy loss: -0.0054  | Total Loss: 3.48 | Total Steps: 45
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1706/129000  | Episode Reward: 10  | Average Reward 6.55  | Actor loss: 0.74 | Critic loss: 4.08 | Entropy loss: -0.0031  | Total Loss: 4.83 | Total Steps: 15


Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1733/129000  | Episode Reward: 10  | Average Reward 6.34  | Actor loss: 0.35 | Critic loss: 1.67 | Entropy loss: -0.0006  | Total Loss: 2.02 | Total Steps: 8
---black---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1734/129000  | Episode Reward: 7  | Average Reward 6.34  | Actor loss: 0.04 | Critic loss: 4.22 | Entropy loss: -0.0003  | Total Loss: 4.25 | Total Steps: 30
---yellow---
Decision Step reward: -3.0
Decision Step reward: -1.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1735/129000  | Episode Reward: 6  | Average Reward 6.35  | Actor loss: -0.58 | Critic loss: 5.52 | Entropy loss: -0.0061  | Total Loss: 4.94 | Total Steps: 60
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1736/129000  | Episode Reward: 7  | Average Reward 6.38  | Actor loss: 0.04 | Critic loss: 3.

Training  | Episode: 1762/129000  | Episode Reward: 9  | Average Reward 6.15  | Actor loss: -0.35 | Critic loss: 2.58 | Entropy loss: -0.0061  | Total Loss: 2.22 | Total Steps: 61
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1763/129000  | Episode Reward: 7  | Average Reward 6.15  | Actor loss: 0.09 | Critic loss: 6.87 | Entropy loss: -0.0007  | Total Loss: 6.96 | Total Steps: 34
---black---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1764/129000  | Episode Reward: 4  | Average Reward 6.15  | Actor loss: -0.02 | Critic loss: 4.65 | Entropy loss: -0.0002  | Total Loss: 4.63 | Total Steps: 47
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1765/129000  | Episode Reward: 10  | Average Reward 6.15  | Actor loss: 0.19 | Critic loss: 3.00 | Entropy loss: -0.0002  | Total Loss: 3.19 | Total Steps: 8
---blue---
Deci

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1792/129000  | Episode Reward: 4  | Average Reward 6.12  | Actor loss: -1.21 | Critic loss: 10.83 | Entropy loss: -0.0052  | Total Loss: 9.61 | Total Steps: 42
---red---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1793/129000  | Episode Reward: 10  | Average Reward 6.12  | Actor loss: 0.90 | Critic loss: 2.95 | Entropy loss: -0.0015  | Total Loss: 3.85 | Total Steps: 9
---red---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1794/129000  | Episode Reward: 7  | Average Reward 6.14  | Actor loss: -0.57 | Critic loss: 7.86 | Entropy loss: -0.0043  | Total Loss: 7.29 | Total Steps: 47
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1795/129000  | Episode Reward: 1  | Average Reward 6.09  | Actor loss: -0.28 | Critic loss: 9.06

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: 

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 37/100  | Episode Reward: 4  | Average Reward -0.03  | Actor loss: 0.01 | Critic loss: 2.95 | Entropy loss: -0.0019  | Total Loss: 2.96 | Total Steps: 42
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 38/100  | Episode Reward: 1  | Average Reward -0.06  | Actor loss: 0.07 | Critic loss: 9.69 | Entropy loss: -0.0035  | Total Loss: 9.76 | Total Steps: 59
TEST: ---cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 39/100  | Episode Reward: 10  | Average Reward -0.03  | Actor loss: 0.13 | Critic loss: 5.45 | Entropy loss: -0.0035  | Total Loss: 5.58 | Total Steps: 6
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
T

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 62/100  | Episode Reward: 7  | Average Reward 1.44  | Actor loss: 0.01 | Critic loss: 6.41 | Entropy loss: -0.0029  | Total Loss: 6.41 | Total Steps: 30
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 63/100  | Episode Reward: 4  | Average Reward 1.44  | Actor loss: 0.02 | Critic loss: 1.33 | Entropy loss: -0.0017  | Total Loss: 1.35 | Total Steps: 46
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 64/100  | Episode Reward: 7  | Average Reward 1.44  | Actor loss: 0.01 | Critic loss: 15.29 | Entropy loss: -0.0004  | Total Loss: 15.30 | Total Steps: 38
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TES

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 79/100  | Episode Reward: -71  | Average Reward 0.74  | Actor loss: 10.61 | Critic loss: 7.01 | Entropy loss: -0.0169  | Total Loss: 17.60 | Total Steps: 376
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 80/100  | Episode Reward: 4  | Average Reward 0.83  | Actor loss: 0.00 | Critic loss: 8.89 | Entropy loss: -0.0032  | Total Loss: 8.89 | Total Steps: 44
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 81/100  | Episode Reward: 0  | Average Reward 0.78  | Actor loss: 0.05 | Critic loss: 10.77 | Entropy loss: -0.0167  | Total Loss: 10.80 | Total Steps: 77
TEST: ---sphere

TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 96/100  | Episode Reward: -10  | Average Reward 0.09  | Actor loss: -0.00 | Critic loss: 77.35 | Entropy loss: -0.0002  | Total Loss: 77.34 | Total Steps: 500
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 97/100  | Episode Reward: 7  | Average Reward 0.10  | Actor loss: 0.14 | Critic loss: 11.12 | Entropy loss: -0.0047  | Total Loss: 11.26 | Total Steps: 30
TEST: ---sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 98/100  | Episode Reward: 10  | Average Reward 0.10  | Actor loss: 7.46 | Critic loss: 20.46 | Entropy loss: -0.0229  | Total Loss: 27.90 | Total Steps: 8
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 99/100  | Episode Reward: 7  | Average Reward 0.12  | Actor loss: 1.40 | Critic loss: 28.80 | En

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1826/129000  | Episode Reward: 4  | Average Reward 6.11  | Actor loss: -0.06 | Critic loss: 4.69 | Entropy loss: -0.0004  | Total Loss: 4.63 | Total Steps: 43
---sphere---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1827/129000  | Episode Reward: 7  | Average Reward 6.14  | Actor loss: 0.32 | Critic loss: 3.65 | Entropy loss: -0.0037  | Total Loss: 3.97 | Total Steps: 47
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1828/129000  | Episode Reward: 10  | Average Reward 6.17  | Actor loss: 0.14 | Critic loss: 3.73 | Entropy loss: -0.0008  | Total Loss: 3.87 | Total Steps: 31
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1829/129000  | Episode Reward: 10  | Average Reward 6.17  | Actor loss: 0.08 | Critic loss: 3.95 | Entropy loss: -0.0006  | Total Loss: 4.03 | Tota

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1853/129000  | Episode Reward: 7  | Average Reward 5.75  | Actor loss: 0.19 | Critic loss: 7.82 | Entropy loss: -0.0020  | Total Loss: 8.01 | Total Steps: 42
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1854/129000  | Episode Reward: 1  | Average Reward 5.74  | Actor loss: -0.25 | Critic loss: 11.87 | Entropy loss: -0.0047  | Total Loss: 11.62 | Total Steps: 58
---prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1855/129000  | Episode Reward: 10  | Average Reward 5.75  | Actor loss: 0.01 | Critic loss: 6.78 | Entropy loss: -0.0000  | Total Loss: 6.79 | Total Steps: 6
---capsule---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1856/129000  | Episode Reward: 7  | Average Reward 5.74  | Actor loss: 0.09 | Critic los

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1883/129000  | Episode Reward: 7  | Average Reward 5.75  | Actor loss: 0.52 | Critic loss: 6.67 | Entropy loss: -0.0029  | Total Loss: 7.19 | Total Steps: 31
---cylinder---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1884/129000  | Episode Reward: 1  | Average Reward 5.71  | Actor loss: -0.03 | Critic loss: 8.85 | Entropy loss: -0.0004  | Total Loss: 8.82 | Total Steps: 53
---capsule---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1885/129000  | Episode Reward: 4  | Average Reward 5.76  | Actor loss: -0.01 | Critic loss: 5.66 | Entropy loss: -0.0004  | Total Loss: 5.65 | Total Steps: 53
---cylinder---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 11/100  | Episode Reward: 1  | Average Reward 0.17  | Actor loss: 0.04 | Critic loss: 6.55 | Entropy loss: -0.0363  | Total Loss: 6.55 | Total Steps: 106
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 12/100  | Episode Reward: 10  | Average Reward 0.17  | Actor loss: 0.26 | Critic loss: 8.43 | Entropy loss: -0.0116  | Total Loss: 8.68 | Total Steps: 6
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 13/100  | Episode Reward: 4  | Average Reward 0.14  | Actor loss: 0.00 | Critic loss: 3.16 | Entropy loss: -0.0033  | Total Loss: 3.16 | Total Steps: 42
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Agent in termina

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 39/100  | Episode Reward: -2  | Average Reward 0.04  | Actor loss: 0.17 | Critic loss: 10.73 | Entropy loss: -0.0329  | Total Loss: 10.87 | Total Steps: 177
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 40/100  | Episode Reward: 4  | Average Reward 0.06  | Actor loss: 0.02 | Critic loss: 2.46 | Entropy loss: -0.0024  | Total Loss: 2.48 | Total Steps: 49
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 41/100  | Episode Reward: 4  | Average Reward 0.07  | Actor l

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 66/100  | Episode Reward: 7  | Average Reward 0.58  | Actor loss: 0.00 | Critic loss: 2.78 | Entropy loss: -0.0200  | Total Loss: 2.76 | Total Steps: 36
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 67/100  | Episode Reward: -2  | Average Reward 0.53  | Actor loss: 0.00 | Critic loss: 7.65 | Entropy loss: -0.0051  | Total Loss: 7.65 | Total Steps: 85
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 68/100  | Episode Reward: 10  | Average Reward 0.53  | Actor loss: 0.03 | Critic loss: 3.08 | Entropy loss: -0.0023  | Total Loss: 3.11 | Total Steps: 6
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 69/100  | Episode Reward:

TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 94/100  | Episode Reward: 6  | Average Reward 0.78  | Actor loss: 0.01 | Critic loss: 0.55 | Entropy loss: -0.0288  | Total Loss: 0.53 | Total Steps: 70
TEST: ---green---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 95/100  | Episode Reward: 1  | Average Reward 0.77  | Actor loss: 0.01 | Critic loss: 7.27 | Entropy loss: -0.0093  | Total Loss: 7.27 | Total Steps: 120
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0


---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1924/129000  | Episode Reward: 10  | Average Reward 5.96  | Actor loss: 0.01 | Critic loss: 1.02 | Entropy loss: -0.0000  | Total Loss: 1.03 | Total Steps: 6
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1925/129000  | Episode Reward: 7  | Average Reward 5.94  | Actor loss: 0.12 | Critic loss: 5.32 | Entropy loss: -0.0006  | Total Loss: 5.44 | Total Steps: 30
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1926/129000  | Episode Reward: 10  | Average Reward 5.96  | Actor loss: 0.01 | Critic loss: 0.29 | Entropy loss: -0.0000  | Total Loss: 0.30 | Total Steps: 6
---blue---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1927/129000  | Episode Reward: 4  | Average Reward 5.92  | Actor loss: -0.05 | Critic loss: 6.29 | Entropy los

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1954/129000  | Episode Reward: 7  | Average Reward 6.03  | Actor loss: 0.10 | Critic loss: 2.38 | Entropy loss: -0.0009  | Total Loss: 2.48 | Total Steps: 43
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1955/129000  | Episode Reward: 10  | Average Reward 6.03  | Actor loss: 0.17 | Critic loss: 1.55 | Entropy loss: -0.0003  | Total Loss: 1.71 | Total Steps: 8
---green---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1956/129000  | Episode Reward: 10  | Average Reward 6.04  | Actor loss: 0.34 | Critic loss: 2.53 | Entropy loss: -0.0005  | Total Loss: 2.88 | Total Steps: 8
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1957/129000  | Episode Reward: 4  | Average Reward 6.01  | Actor loss: -0.05 | Critic loss: 6.92 | Entropy loss: -0.0018  

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1985/129000  | Episode Reward: 4  | Average Reward 6.06  | Actor loss: -0.04 | Critic loss: 4.73 | Entropy loss: -0.0003  | Total Loss: 4.70 | Total Steps: 42
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1986/129000  | Episode Reward: 10  | Average Reward 6.08  | Actor loss: 0.05 | Critic loss: 0.75 | Entropy loss: -0.0001  | Total Loss: 0.80 | Total Steps: 6
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1987/129000  | Episode Reward: 4  | Average Reward 6.06  | Actor loss: -0.09 | Critic loss: 6.18 | Entropy loss: -0.0014  | Total Loss: 6.10 | Total Steps: 45
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1988/129000  | Episode Reward: 7  | Average Reward 6.04  | Actor loss: -0.50 | Critic loss: 3.

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Step: 200
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 400
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 12/100  | Episode Reward: -22  | Average Reward 1.90  | Actor loss: 1.05 | Critic loss: 4.69 | Entropy loss: -0.0314  | Total Loss: 5.71 | Total Steps: 469
TEST: ---capsule---
TEST: Step: 100
TEST: Step: 200
TEST: Step: 300
TEST: Step: 400
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 13/100  | Episode Reward: -10  | Average Reward 1.80  | Actor loss: -0.00 | Critic loss:

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 37/100  | Episode Reward: 7  | Average Reward 1.70  | Actor loss: 0.94 | Critic loss: 24.93 | Entropy loss: -0.0130  | Total Loss: 25.86 | Total Steps: 43
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 61/100  | Episode Reward: 10  | Average Reward 1.44  | Actor loss: 0.01 | Critic loss: 11.40 | Entropy loss: -0.0004  | Total Loss: 11.41 | Total Steps: 31
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 62/100  | Episode Reward: -13  | Average Reward 1.33  | Actor loss: 0.72 | Critic loss: 14.50 | Entropy loss: -0.0339  | Total Loss: 15.19 | Total Steps: 158
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testi

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 83/100  | Episode Reward: -2  | Average Reward 2.07  | Actor loss: 0.30 | Critic loss: 11.00 | Entropy loss: -0.0246  | Total Loss: 11.28 | Total Steps: 141
TEST: ---sphere---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -1.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 84/100  | Episode Reward: -6  | Average Reward 2.65  | Actor loss: 0.67 | Critic loss: 13.95 | Entropy loss: -0.0327  | Total Loss: 14.58 | Total Steps: 175
TEST: ---cube---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 85/100  | Episode Reward: 10  | Average

Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2008/129000  | Episode Reward: 4  | Average Reward 6.18  | Actor loss: -0.84 | Critic loss: 11.29 | Entropy loss: -0.0132  | Total Loss: 10.44 | Total Steps: 111
---sphere---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2009/129000  | Episode Reward: 10  | Average Reward 6.18  | Actor loss: 0.15 | Critic loss: 1.90 | Entropy loss: -0.0002  | Total Loss: 2.04 | Total Steps: 8
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2010/129000  | Episode Reward: 10  | Average Reward 6.18  | Actor loss: 0.07 | Critic loss: 13.44 | Entropy loss: -0.0000  | Total Loss: 13.52 | Total Steps: 6
---cube---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2011/129000  | Episode Reward: 7  | Average Reward 6.17  | Actor loss: 0.04 | Critic loss: 8.59 | Entropy loss: -

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2037/129000  | Episode Reward: 4  | Average Reward 6.45  | Actor loss: -0.01 | Critic loss: 5.82 | Entropy loss: -0.0049  | Total Loss: 5.80 | Total Steps: 53
---cylinder---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2038/129000  | Episode Reward: 7  | Average Reward 6.45  | Actor loss: 0.03 | Critic loss: 7.35 | Entropy loss: -0.0006  | Total Loss: 7.38 | Total Steps: 29
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2039/129000  | Episode Reward: 10  | Average Reward 6.46  | Actor loss: 0.06 | Critic loss: 13.27 | Entropy loss: -0.0000  | Total Loss: 13.32 | Total Steps: 6
---prism---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2040/129000  | Episode Reward: 10  | Average Reward 6.47  | Actor loss: -0.06 | Critic loss: 2.72 | Entropy loss: -0.0053  | Total Loss: 2.65 | Total Steps: 51
---sphe

Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2066/129000  | Episode Reward: -2  | Average Reward 6.46  | Actor loss: -1.04 | Critic loss: 8.58 | Entropy loss: -0.0078  | Total Loss: 7.53 | Total Steps: 72
---cube---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2067/129000  | Episode Reward: 7  | Average Reward 6.46  | Actor loss: 0.12 | Critic loss: 4.38 | Entropy loss: -0.0006  | Total Loss: 4.50 | Total Steps: 30
---cylinder---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2068/129000  | Episode Reward: 7  | Average Reward 6.46  | Actor loss: 0.29 | Critic loss: 6.24 | Entropy loss: -0.0026  | Total Loss: 6.53 | Total Steps: 39
---capsule---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2069/12

Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2094/129000  | Episode Reward: -14  | Average Reward 6.16  | Actor loss: -0.38 | Critic loss: 17.41 | Entropy loss: -0.0044  | Total Loss: 17.03 | Total Steps: 153
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2095/129000  | Episode Reward: 4  | Average Reward 6.13  | Actor loss: 0.27 | Critic loss: 8.23 | Entropy loss: -0.0062  | Total Loss: 8.49 | Total Steps: 67
---sphere---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2096/129000  | Episode Reward: 4  | Average Reward 6.14  | Actor loss: -1.34 | Critic loss: 5.26 | Entropy loss: -0.0083  | Total Loss: 3.9

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 21/100  | Episode Reward: 4  | Average Reward 2.82  | Actor loss: 3.12 | Critic loss: 20.10 | Entropy loss: -0.0185  | Total Loss: 23.21 | Total Steps: 40
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 7  | Average Reward 2.81  | Actor loss: 0.01 | Critic loss: 15.60 | Entropy loss: -0.0021  | Total Loss: 15.61 | Total Steps: 29
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 23/100  | Episode Reward: 10  | Average Reward 2.82  | Actor loss: 0.37 | Critic loss: 6.71 | Entropy loss: -0.0638  | Total Loss: 7.01 | Total Steps: 7
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 24/100  | Episode Reward: 10  | Average Reward 2.85  |

TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 49/100  | Episode Reward: 7  | Average Reward 2.54  | Actor loss: 0.01 | Critic loss: 3.62 | Entropy loss: -0.0015  | Total Loss: 3.63 | Total Steps: 38
TEST: ---black---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 50/100  | Episode Reward: -2  | Average Reward 2.52  | Actor loss: 0.01 | Critic loss: 15.25 | Entropy loss: -0.0060  | Total Loss: 15.25 | Total Steps: 85
TEST: ---yellow---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 51/100  | Episode Reward: 10  | Average Reward 2.52  | Actor loss: 4.96 | Critic loss: 11.85 | Entropy loss: -0.0753  | Total Loss: 16.73 | Total Steps: 13
TEST: ---red---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TE

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 74/100  | Episode Reward: 1  | Average Reward 2.18  | Actor loss: 0.02 | Critic loss: 3.71 | Entropy loss: -0.0016  | Total Loss: 3.73 | Total Steps: 50
TEST: ---red---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 75/100  | Episode Reward: 10  | Average Reward 2.19  | Actor loss: 0.30 | Critic loss: 7.62 | Entropy loss: -0.0602  | Total Loss: 7.86 | Total Steps: 10
TEST: ---blue---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 76/100  | Episode Reward: 7  | Average Reward 2.18  | Actor loss: 2.42 | Critic loss: 15.68 | Entropy loss: -0.0055  | Total Loss: 18.09 | Total Steps: 31
TEST: ---blue---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 77/100  | Episode Reward: 10  | Average Reward 2.21  | Ac

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2104/129000  | Episode Reward: 10  | Average Reward 6.25  | Actor loss: 0.25 | Critic loss: 3.43 | Entropy loss: -0.0003  | Total Loss: 3.68 | Total Steps: 8
---red---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2105/129000  | Episode Reward: 4  | Average Reward 6.22  | Actor loss: -0.16 | Critic loss: 6.10 | Entropy loss: -0.0019  | Total Loss: 5.94 | Total Steps: 53
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2106/129000  | Episode Reward: 10  | Average Reward 6.29  | Actor loss: 0.98 | Critic loss: 3.63 | Entropy loss: -0.0021  | Total Loss: 4.61 | Total Steps: 11
---yellow---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2107/129000  | Episode Reward: 10  | Average Reward 6.29  | Actor loss: 0.33 | Critic loss: 3.31 | Entropy loss: -0.0004  | Total Loss: 3.64 | Tot

Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2135/129000  | Episode Reward: 4  | Average Reward 6.18  | Actor loss: -0.87 | Critic loss: 4.04 | Entropy loss: -0.0075  | Total Loss: 3.16 | Total Steps: 64
---black---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2136/129000  | Episode Reward: 4  | Average Reward 6.17  | Actor loss: -0.03 | Critic loss: 6.25 | Entropy loss: -0.0006  | Total Loss: 6.22 | Total Steps: 49
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2137/129000  | Episode Reward: 7  | Average Reward 6.20  | Actor loss: 0.05 | Critic loss: 4.81 | Entropy loss: -0.0004  | Total Loss: 4.86 | Total Steps: 34
---green---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2138/129000  | Episode Reward: 7  | Ave

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2167/129000  | Episode Reward: 10  | Average Reward 6.33  | Actor loss: 0.00 | Critic loss: 1.15 | Entropy loss: -0.0000  | Total Loss: 1.16 | Total Steps: 6
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2168/129000  | Episode Reward: 7  | Average Reward 6.32  | Actor loss: 0.03 | Critic loss: 6.67 | Entropy loss: -0.0013  | Total Loss: 6.70 | Total Steps: 54
---black---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2169/129000  | Episode Reward: 10  | Average Reward 6.35  | Actor loss: 0.00 | Critic loss: 0.52 | Entropy loss: -0.0000  | Total Loss: 0.53 | Total Steps: 6
---green---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2170/129000  | Episode Reward: 4  | Average Reward 6.32  | Actor loss: -0.06 | Critic loss: 9.33 | Entropy loss: -0.0004  

Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2198/129000  | Episode Reward: 7  | Average Reward 6.16  | Actor loss: 0.06 | Critic loss: 7.96 | Entropy loss: -0.0002  | Total Loss: 8.02 | Total Steps: 30
---green---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2199/129000  | Episode Reward: 4  | Average Reward 6.13  | Actor loss: -0.14 | Critic loss: 9.44 | Entropy loss: -0.0026  | Total Loss: 9.30 | Total Steps: 32
---blue---
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2200/129000  | Episode Reward: 7  | Average Reward 6.14  | Actor loss: 0.00 | Critic loss: 4.38 | Entropy loss: -0.0002  | Total Loss: 4.38 | Total Steps: 47
---capsule---
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
Decision Step reward: -3.0
De

Testing  | Episode: 20/100  | Episode Reward: 4  | Average Reward 2.59  | Actor loss: 0.39 | Critic loss: 13.72 | Entropy loss: -0.0079  | Total Loss: 14.10 | Total Steps: 39
TEST: ---capsule---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 21/100  | Episode Reward: 10  | Average Reward 2.59  | Actor loss: 0.05 | Critic loss: 17.37 | Entropy loss: -0.0031  | Total Loss: 17.42 | Total Steps: 91
TEST: ---cylinder---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 22/100  | Episode Reward: 4  | Average Reward 2.58  | Actor loss: 0.01 | Critic loss: 3.72 | Entropy loss: -0.0007  | Total Loss: 3.73 | Total Steps: 42
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 23/100  

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 200
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 300
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: 

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 400
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Max Step Reward: -10
TEST: Step: 500
Testing  | Episode: 55/100  | Episode Reward: -112  | Average Reward -0.03  | Actor loss: -11.68 | Critic loss: 82.85 | Entropy loss: -0.0239  | Total Loss: 71.14 | Total Steps: 500
TEST: ---prism---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Step: 100
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0

TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 71/100  | Episode Reward: 4  | Average Reward -1.07  | Actor loss: 0.21 | Critic loss: 14.36 | Entropy loss: -0.0017  | Total Loss: 14.56 | Total Steps: 47
TEST: ---sphere---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 72/100  | Episode Reward: 10  | Average Reward -1.03  | Actor loss: 0.08 | Critic loss: 5.82 | Entropy loss: -0.0013  | Total Loss: 5.90 | Total Steps: 6
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 73/100  | Episode Reward: 4  | Average Reward -1.04  | Actor loss: 0.01 | Critic loss: 10.29 | Entropy loss: -0.0006  | Total Loss: 10.30 | Total Steps: 47
TEST: ---cylinder---
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 74/100  | Epi

TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 89/100  | Episode Reward: 10  | Average Reward -2.17  | Actor loss: 0.22 | Critic loss: 19.60 | Entropy loss: -0.0073  | Total Loss: 19.81 | Total Steps: 29
TEST: ---capsule---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 90/100  | Episode Reward: 1  | Average Reward -2.15  | Actor loss: 0.24 | Critic loss: 15.94 | Entropy loss: -0.0025  | Total Loss: 16.17 | Total Steps: 53
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TEST: Terminal Step reward: 10.0
Testing  | Episode: 91/100  | Episode Reward: 4  | Average Reward -2.13  | Actor loss: 0.00 | Critic loss: 7.19 | Entropy loss: -0.0006  | Total Loss: 7.19 | Total Steps: 42
TEST: ---cube---
TEST: Decision Step reward: -3.0
TEST: Agent in terminal steps
TE

Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2220/129000  | Episode Reward: 4  | Average Reward 5.89  | Actor loss: -0.57 | Critic loss: 10.13 | Entropy loss: -0.0125  | Total Loss: 9.55 | Total Steps: 112
---cube---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2221/129000  | Episode Reward: 10  | Average Reward 5.90  | Actor loss: 0.02 | Critic loss: 1.42 | Entropy loss: -0.0001  | Total Loss: 1.44 | Total Steps: 6
---capsule---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2222/129000  | Episode Reward: 10  | Average Reward 5.90  | Actor loss: 0.28 | Critic loss: 16.54 | Entropy loss: -0.0001  | Total Loss: 16.81 | Total Steps: 6
---cube---
Decision Step reward: -3.0
Decision Step reward: -3.0
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2223/129000  | Episode Reward: 4  | Average Reward 5.92  | Actor loss: -0.02 | Critic los

In [8]:
data = {
            'all_average_reward': all_average_reward,
            'all_episode_reward': all_episode_reward,
            'all_actor_loss': all_actor_loss,
            'all_critic_loss': all_critic_loss,
            'all_entropy_loss': all_entropy_loss,
            'all_total_loss': all_total_loss,
            'all_steps': all_steps,
        } 
file_path = f'result/{ALG_NAME}_{ENV_ID}_train.txt'
with open(file_path, 'w') as file:
    json.dump(data, file)

test_data = {
            'all_average_reward': test_average_reward,
            'all_episode_reward': test_episode_reward,
            'all_actor_loss': test_actor_loss,
            'all_critic_loss': test_critic_loss,
            'all_entropy_loss': test_entropy_loss,
            'all_total_loss': test_total_loss,
            'all_steps': test_steps,
        } 
file_path = f'result/{ALG_NAME}_{ENV_ID}_test.txt'
with open(file_path, 'w') as file:
    json.dump(test_data, file)