In [1]:
import matplotlib.pyplot as plt      
from mpl_toolkits import mplot3d
import os
import sys
import glob
import time
import shutil
import gym
from gym import error, spaces, utils
import numpy as np
import math
from pathlib import Path

In [2]:
import gym_fish

In [3]:
from gym_fish.envs.lib import pyflare as fl

In [4]:
Experiment_name = '90 degree servo control EMPIRICAL mass10 2_30 WP 1.0 wr 0.0 wa0.0'
Experiment_note = "change rotation reward"

In [5]:
os.chdir(Path(os.getcwd()+"/../").resolve())
print(os.getcwd())

/home/liuwj/codes/pytorch_fish


### Import Settings

In [6]:
cwd = os.getcwd()+"/"
data_folder =cwd+ 'py_data/'
json_folder =data_folder+'jsons/'
path_folder =json_folder+'paths/'
scripts_folder = cwd+'scripts/'

In [7]:
fluid_json = json_folder+'fluid_param_0.5.json'
rigid_json = json_folder+'rigids_2_30.json'
path_json = str(Path("./py_data/jsons/paths/line.json").resolve())
print(fluid_json)
print(rigid_json)
print(path_json)

/home/liuwj/codes/pytorch_fish/py_data/jsons/fluid_param_0.5.json
/home/liuwj/codes/pytorch_fish/py_data/jsons/rigids_2_30.json
/home/liuwj/codes/pytorch_fish/py_data/jsons/paths/line.json


In [8]:
def plot_infos(infos,title,reward_fig_name):
    dd = {k:[] for k in infos[0].keys()}
    for info in infos:
        for k in info.keys():
            dd[k].append(info[k])
    plt.figure()
    for k in dd.keys():
        if k=='action_penality':
            plt.plot(np.arange(0,len(dd[k])),[kkk  for kkk in dd[k]],label=k)
        else:
            plt.plot(np.arange(0,len(dd[k])),dd[k],label=k)
    plt.legend()
    plt.title(title)
    plt.savefig(reward_fig_name)
    # plt.show()

### Output Settings

In [9]:
store_folder_name = time.strftime('%Y-%m-%d/',time.localtime(time.time()))+Experiment_name+'_'+time.strftime('%Y-%m-%d %H:%M/',time.localtime(time.time()))
imgs_folder_name = 'imgs/'
rl_data_folder_name = 'rl_data/'
rl_data_all_name='collected_o_a.npz'
network_folder_name = 'networks/'
store_folder = cwd+'output_data/'+store_folder_name
imgs_folder = store_folder+imgs_folder_name
rl_data_folder= store_folder+rl_data_folder_name
network_folder = store_folder+network_folder_name

if not os.path.exists(store_folder):
    os.makedirs(store_folder)
if not os.path.exists(imgs_folder):
    os.makedirs(imgs_folder)
if not os.path.exists(rl_data_folder):
    os.makedirs(rl_data_folder)
if not os.path.exists(network_folder):
    os.makedirs(network_folder)

## Saving sources

In [10]:
save_sources = True
sources_folder_name = ['py_data','scripts']
if save_sources==True:
    for fn in sources_folder_name:
        shutil.copytree(cwd+fn,store_folder+'sources/'+fn)
with open(store_folder+'note.txt','w+') as f:
    f.write(time.strftime('%Y-%m-%d %H:%M\n',time.localtime(time.time())))
    f.write(Experiment_note)

### RL_TRAINING Setup

In [11]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy
import math
import os
import sys
import time
import pickle as pkl

from video import VideoRecorder
from logger import Logger
from replay_buffer import ReplayBuffer
import utils
import hydra

## RL Begin

In [12]:
from gym_fish.envs import FishEnv
env = gym.make('fish-v0',
                  gpuId=0,
                       path_json=path_json,
                        couple_mode= fl.COUPLE_MODE.EMPIRICAL,
                         theta = 90,frame_skip=100,
                       fluid_json=fluid_json,wp=1.0,wr=0.0,wa=0.0,
                       rigid_json=rigid_json,
                   )

0
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]


In [13]:
from agent.sac import SACAgent
from agent.critic import DoubleQCritic
from agent.actor import DiagGaussianActor

In [14]:
algo = 'SAC'
algofoler=algo+'/'
if not os.path.exists(network_folder+algofoler):
    os.makedirs(network_folder+algofoler)
if not os.path.exists(network_folder+algofoler+'imgs/'):
    os.makedirs(network_folder+algofoler+'imgs/')
if not os.path.exists(network_folder+algofoler+'imgs/trajs/'):
    os.makedirs(network_folder+algofoler+'imgs/trajs/')
if not os.path.exists(network_folder+algofoler+'imgs/rewards/'):
    os.makedirs(network_folder+algofoler+'imgs/rewards/')
if not os.path.exists(network_folder+algofoler+'models/'):
    os.makedirs(network_folder+algofoler+'models/')

tb_folder =  network_folder+algofoler
model_folder = network_folder+algofoler+'models/'
result_img_folder = network_folder+algofoler+'imgs/'

In [15]:
num_train_steps=1e6
replay_buffer_capacity=5000
num_seed_steps=256
eval_frequency=1000
num_eval_episodes=1
device="cuda"
# logger
log_frequency=10000
log_save_tb=True
# video recorder
save_video=False
seed=1

In [16]:
# agent settings
obs_dim=env.observation_space.shape[0] # to be specified later
action_dim=env.action_space.shape[0] # to be specified later
action_range=[
        float(env.action_space.low.min()),
        float(env.action_space.high.max())
    ] # to be specified later
discount=0.99
init_temperature=0.1
alpha_lr=3e-4
alpha_betas=[0.9, 0.999]
actor_lr=3e-4
actor_betas=[0.9, 0.999]
actor_update_frequency=1
critic_lr=3e-4
critic_betas=[0.9, 0.999]
critic_tau=0.005
critic_target_update_frequency=2
batch_size=128
learnable_temperature=True

# critic settings
critic_hidden_dim=128
critic_hidden_depth=2
# actor settings
actor_hidden_depth=2
actor_hidden_dim=128
actor_log_std_bounds=[-5, 2]


critic_network = DoubleQCritic(obs_dim,action_dim,critic_hidden_dim,critic_hidden_depth)

actor_network = DiagGaussianActor(obs_dim,action_dim,actor_hidden_dim,actor_hidden_depth,actor_log_std_bounds)

agent = SACAgent(obs_dim,action_dim,action_range,device,critic_network,actor_network,discount,init_temperature,alpha_lr,alpha_betas,actor_lr,actor_betas,actor_update_frequency,critic_lr,critic_betas,critic_tau,critic_target_update_frequency,batch_size,learnable_temperature)

In [17]:
evaluate_save_data = True
evaluate_save_fluid = False



In [18]:
class Workspace(object):
    def __init__(self,env):
        self.work_dir = os.getcwd()
        print(f'workspace: {self.work_dir}')

        self.num_eval_episodes= num_eval_episodes
        self.num_train_steps = num_train_steps
        self.num_seed_steps = num_seed_steps
        self.eval_frequency=eval_frequency

        self.logger = Logger(tb_folder,
                             save_tb=log_save_tb,
                             log_frequency=log_frequency,
                             agent="sac")

        utils.set_seed_everywhere(seed)
        self.device = torch.device(device)
        self.env = env
        self.evaluate_save_data =evaluate_save_data
        self.evaluate_save_fluid =evaluate_save_fluid
        self.agent = agent

        self.replay_buffer = ReplayBuffer(self.env.observation_space.shape,
                                          self.env.action_space.shape,
                                          int(replay_buffer_capacity),
                                          self.device)

        # self.video_recorder = VideoRecorder(
        #     self.work_dir if save_video else None)
        self.step = 0
        self.next_eval_steps= self.eval_frequency

    def evaluate(self):
        average_episode_reward = 0
        reward_fig_name =result_img_folder+'rewards/'+"steps_{0}.png".format(self.step)
        traj_fig_name =result_img_folder+'trajs/'+"steps_{0}.png".format(self.step)
        model_path = model_folder+"steps_{0}".format(self.step)
        # save model
        self.agent.save(model_path)
        best_rewards = -9999
        for episode in range(self.num_eval_episodes):
            print("Evaluating episode " + str(episode) +" ....")
            obs = self.env.reset()
            self.agent.reset()
            # self.video_recorder.init(enabled=(episode == 0))
            done = False
            episode_reward = 0
            infos = []
            while not done:
                with utils.eval_mode(self.agent):
                    action = self.agent.act(obs, sample=False)
                if self.evaluate_save_data:
                    obs, reward, done, info = self.env.stepSave(action,save_fluid=self.evaluate_save_fluid)
                else:
                    obs, reward, done, info = self.env.step(action)
                infos.append(info)
                # self.video_recorder.record(self.env)
                episode_reward += reward
            if episode_reward>best_rewards:
                best_rewards = episode_reward
                plot_infos(infos,title=str(self.step),reward_fig_name=reward_fig_name)
                env.plot3d(title=str(self.step),fig_name = traj_fig_name)
            average_episode_reward += episode_reward
            # self.video_recorder.save(f'{self.step}.mp4')
        average_episode_reward /= self.num_eval_episodes
        self.logger.log('eval/episode_reward', average_episode_reward,
                        self.step)
        self.logger.dump(self.step)

    def run(self):
        episode, episode_reward, done = 0, 0, True
        start_time = time.time()
        while self.step < self.num_train_steps:
            if done:
                if self.step > 0:
                    self.logger.log('train/duration',
                                    time.time() - start_time, self.step)
                    start_time = time.time()
                    self.logger.dump(
                        self.step, save=(self.step > self.num_seed_steps))

                # evaluate agent periodically
                if self.step > 0 and self.step >=self.next_eval_steps:
                    self.logger.log('eval/episode', episode, self.step)
                    self.next_eval_steps = self.next_eval_steps+self.eval_frequency
                    self.evaluate()

                self.logger.log('train/episode_reward', episode_reward,
                                self.step)

                obs = self.env.reset()
                self.agent.reset()
                done = False
                episode_reward = 0
                episode_step = 0
                episode += 1

                self.logger.log('train/episode', episode, self.step)

            # sample action for data collection
            if self.step < self.num_seed_steps:
                action = self.env.action_space.sample()
            else:
                with utils.eval_mode(self.agent):
                    action = self.agent.act(obs, sample=True)

            # run training update
            if self.step >= self.num_seed_steps:
                self.agent.update(self.replay_buffer, self.logger, self.step)

            next_obs, reward, done, _ = self.env.step(action)

            # allow infinite bootstrap
            done = float(done)
#             done_no_max = 0 if episode_step + 1 == self.env._max_episode_steps else done
            done_no_max = done
            episode_reward += reward

            self.replay_buffer.add(obs, action, reward, next_obs, done,
                                   done_no_max)

            obs = next_obs
            episode_step += 1
            self.step += 1


### RL Train

In [19]:
!./clean_visdata.sh

rm: 无法删除'./data/vis_data/Fluid/*': 没有那个文件或目录


In [20]:
ws = Workspace(env)



workspace: /home/liuwj/codes/pytorch_fish


In [None]:

ws.run()

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 29 | S: 1479 | R: 0.2242 | D: 24.0 s | BR: 0.0132 | ALOSS: -1.6045 | CLOSS: 0.0669 | TLOSS: 0.1048 | TVAL: 0.0716 | AENT: 0.4649
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 30 | S: 1530 | R: 2.4570 | D: 24.0 s | BR: 0.0266 | ALOSS: -1.7430 | CLOSS: 0.0631 | TLOSS: 0.0992 | TVAL: 0.0706 | AENT: 0.4044
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 31 | S: 1581 | R: 4.4459 | D: 24.1 s | BR: 0.0259 | ALOSS: -1.6645 | CLOSS: 0.0653 | TLOSS: 0.1002 | TVAL: 0.0697 | AENT: 0.4367
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.2

| [33mtrain[0m | E: 54 | S: 2754 | R: 2.0022 | D: 24.1 s | BR: 0.0315 | ALOSS: -1.7725 | CLOSS: 0.0592 | TLOSS: 0.0683 | TVAL: 0.0528 | AENT: 0.2947
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 55 | S: 2805 | R: 0.8799 | D: 24.3 s | BR: 0.0374 | ALOSS: -1.6242 | CLOSS: 0.0582 | TLOSS: 0.0686 | TVAL: 0.0522 | AENT: 0.3157
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 56 | S: 2856 | R: 2.9915 | D: 24.3 s | BR: 0.0419 | ALOSS: -1.6821 | CLOSS: 0.0616 | TLOSS: 0.0682 | TVAL: 0.0516 | AENT: 0.3223
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 57 | S: 2907 | R: 3.1595 | D: 24.1 s

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [32meval[0m  | E: 79 | S: 4026 | R: 0.8622
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 80 | S: 4077 | R: 13.3108 | D: 101.4 s | BR: 0.0603 | ALOSS: -2.6545 | CLOSS: 0.2777 | TLOSS: 0.0327 | TVAL: 0.0402 | AENT: -0.1858
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 81 | S: 4122 | R: 10.0840 | D: 23.3 s | BR: 0.0570 | ALOSS: -3.0797 | CLOSS: 0.3280 | TLOSS: 0.0306 | TVAL: 0.0399 | AENT: -0.2339
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000

| [33mtrain[0m | E: 104 | S: 5277 | R: 12.4332 | D: 25.4 s | BR: 0.1063 | ALOSS: -4.4360 | CLOSS: 0.5145 | TLOSS: -0.0050 | TVAL: 0.0390 | AENT: -1.1280
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 105 | S: 5328 | R: 13.4225 | D: 25.5 s | BR: 0.1206 | ALOSS: -4.3206 | CLOSS: 0.4850 | TLOSS: -0.0055 | TVAL: 0.0390 | AENT: -1.1412
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 106 | S: 5379 | R: 12.8485 | D: 25.6 s | BR: 0.1150 | ALOSS: -4.5464 | CLOSS: 0.5949 | TLOSS: -0.0050 | TVAL: 0.0391 | AENT: -1.1280
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 107 | S: 5430 | R: 13.10

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 130 | S: 6603 | R: 13.3306 | D: 25.8 s | BR: 0.1841 | ALOSS: -5.4150 | CLOSS: 0.4713 | TLOSS: -0.0130 | TVAL: 0.0448 | AENT: -1.2896
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 131 | S: 6654 | R: 14.5714 | D: 25.7 s | BR: 0.1762 | ALOSS: -5.1775 | CLOSS: 0.4830 | TLOSS: -0.0122 | TVAL: 0.0452 | AENT: -1.2703
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 132 | S: 6705 | R: 10.9374 | D: 25.6 s | BR: 0.1911 | ALOSS: -5.3044 | CLOSS: 0.5018 | TLOSS: -0.0137 | TVAL: 0.0457 | AENT: -1.2999
0
[0.97686249 2.00028078 2.99999994] [ 9.99980

| [33mtrain[0m | E: 155 | S: 7878 | R: 14.4713 | D: 25.4 s | BR: 0.2376 | ALOSS: -5.3747 | CLOSS: 0.2418 | TLOSS: -0.0026 | TVAL: 0.0568 | AENT: -1.0458
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 156 | S: 7929 | R: 13.5916 | D: 25.6 s | BR: 0.2429 | ALOSS: -4.9968 | CLOSS: 0.2873 | TLOSS: -0.0027 | TVAL: 0.0569 | AENT: -1.0467
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 157 | S: 7980 | R: 12.8056 | D: 25.6 s | BR: 0.2383 | ALOSS: -5.1349 | CLOSS: 0.2503 | TLOSS: -0.0077 | TVAL: 0.0572 | AENT: -1.1352
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 158 | S: 8031 | R: 15.72

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 180 | S: 9153 | R: 9.1330 | D: 25.6 s | BR: 0.2745 | ALOSS: -5.4223 | CLOSS: 0.1002 | TLOSS: 0.0024 | TVAL: 0.0583 | AENT: -0.9587
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 181 | S: 9204 | R: 10.7365 | D: 25.6 s | BR: 0.2773 | ALOSS: -5.2612 | CLOSS: 0.1002 | TLOSS: 0.0048 | TVAL: 0.0580 | AENT: -0.9176
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 182 | S: 9255 | R: 13.1172 | D: 25.6 s | BR: 0.2664 | ALOSS: -5.3989 | CLOSS: 0.1022 | TLOSS: 0.0037 | TVAL: 0.0576 | AENT: -0.9362
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e

| [33mtrain[0m | E: 205 | S: 10428 | R: 12.2228 | D: 25.9 s | BR: 0.2538 | ALOSS: -5.6235 | CLOSS: 0.1001 | TLOSS: 0.0024 | TVAL: 0.0537 | AENT: -0.9555
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 206 | S: 10479 | R: 10.0507 | D: 25.8 s | BR: 0.2617 | ALOSS: -5.6417 | CLOSS: 0.1038 | TLOSS: 0.0026 | TVAL: 0.0535 | AENT: -0.9517
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 207 | S: 10530 | R: 13.0329 | D: 25.7 s | BR: 0.2727 | ALOSS: -5.7441 | CLOSS: 0.1242 | TLOSS: 0.0014 | TVAL: 0.0532 | AENT: -0.9736
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 208 | S: 10581 | R: 12.2

  


| [32meval[0m  | E: 217 | S: 11040 | R: 0.8691
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 218 | S: 11091 | R: 9.3317 | D: 100.7 s | BR: 0.2570 | ALOSS: -5.5191 | CLOSS: 0.1245 | TLOSS: 0.0034 | TVAL: 0.0502 | AENT: -0.9320
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 219 | S: 11142 | R: 11.8174 | D: 25.5 s | BR: 0.2563 | ALOSS: -5.9892 | CLOSS: 0.1550 | TLOSS: 0.0019 | TVAL: 0.0499 | AENT: -0.9613
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 220 | S: 11193 | R: 11.2948 | D: 25.7 s | BR: 0.2439 | ALOSS: -5.4109 | CLOSS: 0.0795 | TLOSS: 0.0064 | TVAL: 0.0495 | AENT: -0.87

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 243 | S: 12366 | R: 7.0811 | D: 25.9 s | BR: 0.2413 | ALOSS: -5.7457 | CLOSS: 0.1511 | TLOSS: 0.0038 | TVAL: 0.0393 | AENT: -0.9029
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 244 | S: 12417 | R: 11.2564 | D: 25.6 s | BR: 0.2490 | ALOSS: -5.4488 | CLOSS: 0.1302 | TLOSS: 0.0020 | TVAL: 0.0390 | AENT: -0.9491
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 245 | S: 12468 | R: 12.5213 | D: 25.8 s | BR: 0.2452 | ALOSS: -5.6460 | CLOSS: 0.1258 | TLOSS: 0.0019 | TVAL: 0.0388 | AENT: -0.9508
0
[0.97686249 2.00028078 2.99999994] [ 9.999806

| [33mtrain[0m | E: 268 | S: 13641 | R: 13.8148 | D: 25.7 s | BR: 0.2307 | ALOSS: -5.6773 | CLOSS: 0.1251 | TLOSS: -0.0002 | TVAL: 0.0353 | AENT: -1.0064
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 269 | S: 13692 | R: 14.1050 | D: 25.8 s | BR: 0.2303 | ALOSS: -5.3958 | CLOSS: 0.1310 | TLOSS: 0.0026 | TVAL: 0.0352 | AENT: -0.9268
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 270 | S: 13743 | R: 13.1610 | D: 25.6 s | BR: 0.2509 | ALOSS: -5.8693 | CLOSS: 0.1319 | TLOSS: 0.0002 | TVAL: 0.0350 | AENT: -0.9933
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 271 | S: 13794 | R: 13.

| [33mtrain[0m | E: 293 | S: 14916 | R: 14.4917 | D: 25.8 s | BR: 0.2569 | ALOSS: -5.8059 | CLOSS: 0.0965 | TLOSS: -0.0010 | TVAL: 0.0335 | AENT: -1.0296
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 294 | S: 14967 | R: 13.7509 | D: 25.8 s | BR: 0.2453 | ALOSS: -5.6725 | CLOSS: 0.1116 | TLOSS: 0.0027 | TVAL: 0.0335 | AENT: -0.9186
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 295 | S: 15018 | R: 14.1250 | D: 25.7 s | BR: 0.2490 | ALOSS: -5.9222 | CLOSS: 0.1210 | TLOSS: 0.0015 | TVAL: 0.0332 | AENT: -0.9537
Evaluating episode 0 ....
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [32meval[0m  | 

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 318 | S: 16191 | R: 14.2453 | D: 25.7 s | BR: 0.2586 | ALOSS: -5.5220 | CLOSS: 0.0977 | TLOSS: -0.0002 | TVAL: 0.0319 | AENT: -1.0060
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 319 | S: 16242 | R: 13.2430 | D: 25.7 s | BR: 0.2456 | ALOSS: -5.7600 | CLOSS: 0.1038 | TLOSS: 0.0012 | TVAL: 0.0319 | AENT: -0.9630
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 320 | S: 16293 | R: 12.2130 | D: 25.6 s | BR: 0.2568 | ALOSS: -5.7182 | CLOSS: 0.0923 | TLOSS: 0.0006 | TVAL: 0.0318 | AENT: -0.9805
0
[0.97686249 2.00028078 2.99999994] [ 9.9998

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 343 | S: 17466 | R: 12.5452 | D: 25.6 s | BR: 0.2576 | ALOSS: -5.8764 | CLOSS: 0.0627 | TLOSS: 0.0006 | TVAL: 0.0315 | AENT: -0.9813
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 344 | S: 17517 | R: 10.4265 | D: 25.6 s | BR: 0.2665 | ALOSS: -5.8563 | CLOSS: 0.0557 | TLOSS: 0.0006 | TVAL: 0.0315 | AENT: -0.9817
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 345 | S: 17568 | R: 13.6315 | D: 25.6 s | BR: 0.2631 | ALOSS: -5.8842 | CLOSS: 0.0556 | TLOSS: -0.0006 | TVAL: 0.0315 | AENT: -1.0188
0
[0.97686249 2.00028078 2.99999994] [ 9.9998

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 368 | S: 18741 | R: 12.0246 | D: 25.6 s | BR: 0.2631 | ALOSS: -5.8918 | CLOSS: 0.0606 | TLOSS: 0.0015 | TVAL: 0.0295 | AENT: -0.9479
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 369 | S: 18792 | R: 13.0462 | D: 25.7 s | BR: 0.2669 | ALOSS: -6.1093 | CLOSS: 0.0437 | TLOSS: -0.0014 | TVAL: 0.0294 | AENT: -1.0474
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 370 | S: 18843 | R: 14.8193 | D: 25.6 s | BR: 0.2636 | ALOSS: -5.8815 | CLOSS: 0.0403 | TLOSS: -0.0011 | TVAL: 0.0296 | AENT: -1.0384
0
[0.97686249 2.00028078 2.99999994] [ 9.999

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 393 | S: 20016 | R: 13.2091 | D: 26.0 s | BR: 0.2681 | ALOSS: -6.0106 | CLOSS: 0.0288 | TLOSS: 0.0007 | TVAL: 0.0294 | AENT: -0.9750
Evaluating episode 0 ....
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [32meval[0m  | E: 393 | S: 20016 | R: 14.6433
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 394 | S: 20067 | R: 15.1529 | D: 101.1 s | BR: 0.2652 | ALOSS: -5.9175 | CLOSS: 0.0485 | TLOSS: 0.0017 | TVAL: 0.0293 | AENT: -0.9424
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6

| [33mtrain[0m | E: 417 | S: 21240 | R: 14.2046 | D: 25.4 s | BR: 0.2639 | ALOSS: -5.9980 | CLOSS: 0.0622 | TLOSS: 0.0007 | TVAL: 0.0283 | AENT: -0.9760
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 418 | S: 21291 | R: 13.0032 | D: 25.4 s | BR: 0.2373 | ALOSS: -5.9706 | CLOSS: 0.0493 | TLOSS: 0.0010 | TVAL: 0.0282 | AENT: -0.9663
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 419 | S: 21342 | R: 13.1355 | D: 25.4 s | BR: 0.2575 | ALOSS: -5.8661 | CLOSS: 0.0519 | TLOSS: -0.0005 | TVAL: 0.0281 | AENT: -1.0172
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 420 | S: 21393 | R: 11.

| [33mtrain[0m | E: 442 | S: 22515 | R: 13.2269 | D: 25.4 s | BR: 0.2632 | ALOSS: -6.1094 | CLOSS: 0.0494 | TLOSS: -0.0008 | TVAL: 0.0294 | AENT: -1.0272
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 443 | S: 22566 | R: 13.3878 | D: 25.6 s | BR: 0.2746 | ALOSS: -5.9234 | CLOSS: 0.0483 | TLOSS: 0.0003 | TVAL: 0.0295 | AENT: -0.9914
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 444 | S: 22617 | R: 12.9270 | D: 25.7 s | BR: 0.2654 | ALOSS: -5.8955 | CLOSS: 0.0664 | TLOSS: 0.0015 | TVAL: 0.0294 | AENT: -0.9504
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 445 | S: 22668 | R: 12.

| [33mtrain[0m | E: 467 | S: 23790 | R: 14.6344 | D: 25.9 s | BR: 0.2575 | ALOSS: -6.0918 | CLOSS: 0.0589 | TLOSS: 0.0002 | TVAL: 0.0283 | AENT: -0.9914
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 468 | S: 23841 | R: 13.1492 | D: 26.1 s | BR: 0.2714 | ALOSS: -6.0538 | CLOSS: 0.0584 | TLOSS: 0.0005 | TVAL: 0.0283 | AENT: -0.9819
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 469 | S: 23892 | R: 11.1178 | D: 26.0 s | BR: 0.2599 | ALOSS: -6.0571 | CLOSS: 0.0616 | TLOSS: 0.0010 | TVAL: 0.0283 | AENT: -0.9652
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 470 | S: 23943 | R: 12.6

| [32meval[0m  | E: 491 | S: 25014 | R: 14.6779
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 492 | S: 25065 | R: 12.9447 | D: 101.8 s | BR: 0.2638 | ALOSS: -6.3237 | CLOSS: 0.0606 | TLOSS: -0.0014 | TVAL: 0.0284 | AENT: -1.0500
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 493 | S: 25116 | R: 14.7524 | D: 25.8 s | BR: 0.2644 | ALOSS: -6.1633 | CLOSS: 0.0650 | TLOSS: 0.0015 | TVAL: 0.0284 | AENT: -0.9461
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 494 | S: 25167 | R: 10.3756 | D: 26.0 s | BR: 0.2521 | ALOSS: -6.1416 | CLOSS: 0.0600 | TLOSS: 0.0010 | TVAL: 0.0283 | AENT: -0

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 517 | S: 26340 | R: 11.1290 | D: 26.0 s | BR: 0.2758 | ALOSS: -6.0941 | CLOSS: 0.0469 | TLOSS: 0.0016 | TVAL: 0.0271 | AENT: -0.9423
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 518 | S: 26391 | R: 15.1205 | D: 25.7 s | BR: 0.2732 | ALOSS: -6.2285 | CLOSS: 0.0511 | TLOSS: 0.0007 | TVAL: 0.0270 | AENT: -0.9745
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 519 | S: 26442 | R: 14.8525 | D: 26.0 s | BR: 0.2575 | ALOSS: -6.1908 | CLOSS: 0.0516 | TLOSS: 0.0002 | TVAL: 0.0269 | AENT: -0.9933
0
[0.97686249 2.00028078 2.99999994] [ 9.99980

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 542 | S: 27615 | R: 14.8210 | D: 26.0 s | BR: 0.2571 | ALOSS: -6.4835 | CLOSS: 0.0624 | TLOSS: -0.0022 | TVAL: 0.0281 | AENT: -1.0791
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 543 | S: 27666 | R: 14.2844 | D: 25.7 s | BR: 0.2705 | ALOSS: -6.1470 | CLOSS: 0.0592 | TLOSS: -0.0019 | TVAL: 0.0283 | AENT: -1.0664
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 544 | S: 27717 | R: 14.8078 | D: 25.9 s | BR: 0.2822 | ALOSS: -6.3059 | CLOSS: 0.0525 | TLOSS: -0.0016 | TVAL: 0.0285 | AENT: -1.0578
0
[0.97686249 2.00028078 2.99999994] [ 9.99

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 567 | S: 28890 | R: 14.9843 | D: 26.2 s | BR: 0.2773 | ALOSS: -6.1521 | CLOSS: 0.0569 | TLOSS: 0.0004 | TVAL: 0.0310 | AENT: -0.9877
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 568 | S: 28941 | R: 15.8326 | D: 25.8 s | BR: 0.2828 | ALOSS: -6.2676 | CLOSS: 0.0559 | TLOSS: -0.0007 | TVAL: 0.0310 | AENT: -1.0226
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 569 | S: 28992 | R: 15.2423 | D: 26.1 s | BR: 0.2717 | ALOSS: -6.2223 | CLOSS: 0.0524 | TLOSS: 0.0005 | TVAL: 0.0311 | AENT: -0.9828
0
[0.97686249 2.00028078 2.99999994] [ 9.9998

| [33mtrain[0m | E: 591 | S: 30114 | R: 14.4898 | D: 26.1 s | BR: 0.2652 | ALOSS: -6.2345 | CLOSS: 0.0676 | TLOSS: 0.0012 | TVAL: 0.0308 | AENT: -0.9609
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 592 | S: 30165 | R: 15.8683 | D: 25.8 s | BR: 0.2804 | ALOSS: -6.3589 | CLOSS: 0.0573 | TLOSS: -0.0002 | TVAL: 0.0306 | AENT: -1.0072
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 593 | S: 30216 | R: 15.2606 | D: 25.9 s | BR: 0.2918 | ALOSS: -6.3382 | CLOSS: 0.0484 | TLOSS: 0.0001 | TVAL: 0.0307 | AENT: -0.9956
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 594 | S: 30267 | R: 13.

| [33mtrain[0m | E: 616 | S: 31389 | R: 12.5761 | D: 26.0 s | BR: 0.2866 | ALOSS: -6.4544 | CLOSS: 0.0638 | TLOSS: -0.0015 | TVAL: 0.0308 | AENT: -1.0476
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 617 | S: 31440 | R: 14.8697 | D: 25.8 s | BR: 0.2769 | ALOSS: -6.0806 | CLOSS: 0.0387 | TLOSS: 0.0008 | TVAL: 0.0309 | AENT: -0.9751
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 618 | S: 31491 | R: 13.8557 | D: 26.3 s | BR: 0.2678 | ALOSS: -6.3072 | CLOSS: 0.0392 | TLOSS: -0.0002 | TVAL: 0.0308 | AENT: -1.0080
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 619 | S: 31542 | R: 15

| [33mtrain[0m | E: 641 | S: 32664 | R: 13.3737 | D: 25.9 s | BR: 0.2716 | ALOSS: -6.5312 | CLOSS: 0.0467 | TLOSS: 0.0015 | TVAL: 0.0280 | AENT: -0.9454
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 642 | S: 32715 | R: 12.5395 | D: 26.0 s | BR: 0.2969 | ALOSS: -6.4671 | CLOSS: 0.0427 | TLOSS: 0.0015 | TVAL: 0.0278 | AENT: -0.9453
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 643 | S: 32766 | R: 12.1210 | D: 26.0 s | BR: 0.2772 | ALOSS: -6.4055 | CLOSS: 0.0468 | TLOSS: 0.0026 | TVAL: 0.0276 | AENT: -0.9054
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 644 | S: 32817 | R: 14.4

| [33mtrain[0m | E: 666 | S: 33939 | R: 13.9001 | D: 25.9 s | BR: 0.2677 | ALOSS: -6.1664 | CLOSS: 0.0492 | TLOSS: 0.0026 | TVAL: 0.0269 | AENT: -0.9025
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 667 | S: 33990 | R: 10.9524 | D: 26.0 s | BR: 0.2819 | ALOSS: -6.4728 | CLOSS: 0.0578 | TLOSS: -0.0013 | TVAL: 0.0268 | AENT: -1.0490
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 668 | S: 34041 | R: 14.4671 | D: 26.0 s | BR: 0.2795 | ALOSS: -6.4889 | CLOSS: 0.0478 | TLOSS: -0.0003 | TVAL: 0.0268 | AENT: -1.0125
Evaluating episode 0 ....
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [32meval[0m  |

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 691 | S: 35214 | R: 15.3330 | D: 26.0 s | BR: 0.2785 | ALOSS: -6.0867 | CLOSS: 0.0429 | TLOSS: -0.0001 | TVAL: 0.0282 | AENT: -1.0035
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 692 | S: 35265 | R: 15.3138 | D: 25.9 s | BR: 0.2710 | ALOSS: -6.3448 | CLOSS: 0.0563 | TLOSS: -0.0021 | TVAL: 0.0283 | AENT: -1.0737
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 693 | S: 35316 | R: 14.5924 | D: 26.0 s | BR: 0.2814 | ALOSS: -6.4720 | CLOSS: 0.0756 | TLOSS: -0.0010 | TVAL: 0.0284 | AENT: -1.0354
0
[0.97686249 2.00028078 2.99999994] [ 9.99

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 716 | S: 36489 | R: 14.5442 | D: 25.9 s | BR: 0.2706 | ALOSS: -6.2379 | CLOSS: 0.0570 | TLOSS: -0.0004 | TVAL: 0.0302 | AENT: -1.0136
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 717 | S: 36540 | R: 13.9538 | D: 26.2 s | BR: 0.2770 | ALOSS: -6.4464 | CLOSS: 0.0524 | TLOSS: -0.0005 | TVAL: 0.0302 | AENT: -1.0160
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 718 | S: 36591 | R: 13.9629 | D: 26.1 s | BR: 0.2716 | ALOSS: -6.3198 | CLOSS: 0.0471 | TLOSS: 0.0011 | TVAL: 0.0301 | AENT: -0.9640
0
[0.97686249 2.00028078 2.99999994] [ 9.999

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 741 | S: 37764 | R: 13.0445 | D: 26.0 s | BR: 0.2903 | ALOSS: -6.5216 | CLOSS: 0.0306 | TLOSS: -0.0005 | TVAL: 0.0302 | AENT: -1.0151
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 742 | S: 37815 | R: 15.0761 | D: 26.1 s | BR: 0.2720 | ALOSS: -6.2862 | CLOSS: 0.0333 | TLOSS: 0.0007 | TVAL: 0.0302 | AENT: -0.9760
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 743 | S: 37866 | R: 14.4185 | D: 26.1 s | BR: 0.2741 | ALOSS: -6.3338 | CLOSS: 0.0269 | TLOSS: -0.0000 | TVAL: 0.0303 | AENT: -1.0010
0
[0.97686249 2.00028078 2.99999994] [ 9.999

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 766 | S: 39039 | R: 14.1810 | D: 25.9 s | BR: 0.2739 | ALOSS: -6.5502 | CLOSS: 0.0480 | TLOSS: -0.0002 | TVAL: 0.0284 | AENT: -1.0062
Evaluating episode 0 ....
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [32meval[0m  | E: 766 | S: 39039 | R: 17.5274
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 767 | S: 39090 | R: 13.8969 | D: 102.1 s | BR: 0.2831 | ALOSS: -6.3698 | CLOSS: 0.0297 | TLOSS: 0.0003 | TVAL: 0.0285 | AENT: -0.9911
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [

| [33mtrain[0m | E: 790 | S: 40263 | R: 13.6236 | D: 26.0 s | BR: 0.2800 | ALOSS: -6.4480 | CLOSS: 0.0334 | TLOSS: -0.0010 | TVAL: 0.0297 | AENT: -1.0325
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 791 | S: 40314 | R: 15.1945 | D: 25.8 s | BR: 0.2654 | ALOSS: -6.1303 | CLOSS: 0.0338 | TLOSS: -0.0017 | TVAL: 0.0299 | AENT: -1.0583
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 792 | S: 40365 | R: 13.6950 | D: 26.0 s | BR: 0.2734 | ALOSS: -6.2192 | CLOSS: 0.0273 | TLOSS: -0.0023 | TVAL: 0.0302 | AENT: -1.0777
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 793 | S: 40416 | R: 1

| [33mtrain[0m | E: 815 | S: 41538 | R: 15.0019 | D: 26.1 s | BR: 0.2776 | ALOSS: -6.2759 | CLOSS: 0.0330 | TLOSS: 0.0013 | TVAL: 0.0305 | AENT: -0.9571
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 816 | S: 41589 | R: 15.1975 | D: 26.0 s | BR: 0.2859 | ALOSS: -6.3221 | CLOSS: 0.0320 | TLOSS: -0.0008 | TVAL: 0.0305 | AENT: -1.0248
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 817 | S: 41640 | R: 14.3494 | D: 26.1 s | BR: 0.2712 | ALOSS: -6.2395 | CLOSS: 0.0250 | TLOSS: -0.0017 | TVAL: 0.0306 | AENT: -1.0542
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 818 | S: 41691 | R: 15

| [33mtrain[0m | E: 840 | S: 42813 | R: 15.2645 | D: 26.0 s | BR: 0.2667 | ALOSS: -6.3019 | CLOSS: 0.0399 | TLOSS: -0.0012 | TVAL: 0.0299 | AENT: -1.0406
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 841 | S: 42864 | R: 15.5669 | D: 25.9 s | BR: 0.2835 | ALOSS: -6.2871 | CLOSS: 0.0544 | TLOSS: -0.0001 | TVAL: 0.0299 | AENT: -1.0020
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 842 | S: 42915 | R: 15.1006 | D: 26.1 s | BR: 0.2649 | ALOSS: -6.2791 | CLOSS: 0.0406 | TLOSS: 0.0014 | TVAL: 0.0299 | AENT: -0.9534
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 843 | S: 42966 | R: 14

| [32meval[0m  | E: 864 | S: 44037 | R: 16.5522
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 865 | S: 44088 | R: 14.9676 | D: 101.3 s | BR: 0.2856 | ALOSS: -6.2922 | CLOSS: 0.0299 | TLOSS: 0.0006 | TVAL: 0.0296 | AENT: -0.9814
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 866 | S: 44139 | R: 15.2348 | D: 24.5 s | BR: 0.2807 | ALOSS: -6.3812 | CLOSS: 0.0298 | TLOSS: 0.0010 | TVAL: 0.0295 | AENT: -0.9667
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 867 | S: 44190 | R: 12.9010 | D: 24.6 s | BR: 0.2950 | ALOSS: -6.3223 | CLOSS: 0.0271 | TLOSS: -0.0007 | TVAL: 0.0295 | AENT: -1

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 890 | S: 45363 | R: 13.2155 | D: 24.5 s | BR: 0.2680 | ALOSS: -6.3569 | CLOSS: 0.0262 | TLOSS: 0.0008 | TVAL: 0.0285 | AENT: -0.9720
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 891 | S: 45414 | R: 14.0994 | D: 24.5 s | BR: 0.2855 | ALOSS: -6.5204 | CLOSS: 0.0291 | TLOSS: -0.0017 | TVAL: 0.0285 | AENT: -1.0614
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 892 | S: 45465 | R: 14.7305 | D: 24.6 s | BR: 0.2875 | ALOSS: -6.3749 | CLOSS: 0.0304 | TLOSS: 0.0018 | TVAL: 0.0285 | AENT: -0.9365
0
[0.97686249 2.00028078 2.99999994] [ 9.9998

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 915 | S: 46638 | R: 14.5475 | D: 24.7 s | BR: 0.2840 | ALOSS: -6.4021 | CLOSS: 0.0292 | TLOSS: -0.0023 | TVAL: 0.0281 | AENT: -1.0806
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 916 | S: 46689 | R: 15.0265 | D: 24.7 s | BR: 0.2789 | ALOSS: -6.2836 | CLOSS: 0.0300 | TLOSS: -0.0010 | TVAL: 0.0284 | AENT: -1.0350
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 917 | S: 46740 | R: 13.7141 | D: 24.8 s | BR: 0.2909 | ALOSS: -6.3119 | CLOSS: 0.0293 | TLOSS: 0.0002 | TVAL: 0.0284 | AENT: -0.9938
0
[0.97686249 2.00028078 2.99999994] [ 9.999

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 940 | S: 47913 | R: 14.6791 | D: 24.9 s | BR: 0.2970 | ALOSS: -6.3708 | CLOSS: 0.0298 | TLOSS: -0.0017 | TVAL: 0.0287 | AENT: -1.0597
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 941 | S: 47964 | R: 15.6237 | D: 25.0 s | BR: 0.2959 | ALOSS: -6.2973 | CLOSS: 0.0279 | TLOSS: -0.0007 | TVAL: 0.0288 | AENT: -1.0243
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 942 | S: 48015 | R: 12.2318 | D: 25.1 s | BR: 0.2870 | ALOSS: -6.2376 | CLOSS: 0.0290 | TLOSS: -0.0010 | TVAL: 0.0290 | AENT: -1.0361
Evaluating episode 0 ....
0
[0.97686249 2.0

| [33mtrain[0m | E: 964 | S: 49137 | R: 14.8321 | D: 24.3 s | BR: 0.2897 | ALOSS: -6.5144 | CLOSS: 0.0319 | TLOSS: -0.0003 | TVAL: 0.0287 | AENT: -1.0089
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 965 | S: 49188 | R: 13.9543 | D: 24.2 s | BR: 0.2723 | ALOSS: -6.2070 | CLOSS: 0.0265 | TLOSS: 0.0006 | TVAL: 0.0288 | AENT: -0.9787
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 966 | S: 49239 | R: 14.5759 | D: 24.2 s | BR: 0.2851 | ALOSS: -6.2989 | CLOSS: 0.0256 | TLOSS: -0.0000 | TVAL: 0.0287 | AENT: -1.0012
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 967 | S: 49290 | R: 13

| [33mtrain[0m | E: 989 | S: 50412 | R: 15.6298 | D: 24.4 s | BR: 0.2873 | ALOSS: -6.3884 | CLOSS: 0.0335 | TLOSS: 0.0002 | TVAL: 0.0279 | AENT: -0.9913
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 990 | S: 50463 | R: 13.0816 | D: 24.3 s | BR: 0.2706 | ALOSS: -6.2787 | CLOSS: 0.0344 | TLOSS: 0.0009 | TVAL: 0.0279 | AENT: -0.9663
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 991 | S: 50514 | R: 14.4491 | D: 24.3 s | BR: 0.3031 | ALOSS: -6.2202 | CLOSS: 0.0389 | TLOSS: 0.0009 | TVAL: 0.0278 | AENT: -0.9673
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 992 | S: 50565 | R: 14.4

| [33mtrain[0m | E: 1014 | S: 51687 | R: 14.6600 | D: 24.4 s | BR: 0.2742 | ALOSS: -6.3264 | CLOSS: 0.0343 | TLOSS: 0.0006 | TVAL: 0.0271 | AENT: -0.9770
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1015 | S: 51738 | R: 14.0716 | D: 24.4 s | BR: 0.2847 | ALOSS: -6.1531 | CLOSS: 0.0380 | TLOSS: 0.0014 | TVAL: 0.0270 | AENT: -0.9492
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1016 | S: 51789 | R: 14.6764 | D: 24.7 s | BR: 0.2813 | ALOSS: -6.2257 | CLOSS: 0.0396 | TLOSS: 0.0003 | TVAL: 0.0268 | AENT: -0.9900
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1017 | S: 51840 | R: 

| [33mtrain[0m | E: 1039 | S: 52962 | R: 12.9214 | D: 24.1 s | BR: 0.2906 | ALOSS: -6.5004 | CLOSS: 0.0319 | TLOSS: 0.0008 | TVAL: 0.0253 | AENT: -0.9678
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1040 | S: 53013 | R: 14.5681 | D: 24.2 s | BR: 0.2873 | ALOSS: -6.2621 | CLOSS: 0.0353 | TLOSS: 0.0009 | TVAL: 0.0252 | AENT: -0.9663
Evaluating episode 0 ....
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [32meval[0m  | E: 1040 | S: 53013 | R: 17.1431
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1041 | S: 53064 | R: 13.4506 | D: 99.0 s | BR: 0.2876 | ALOSS: -6.3791 | CLOSS: 0.0333 | TLOSS: -0.0

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1064 | S: 54237 | R: 12.3020 | D: 24.8 s | BR: 0.2881 | ALOSS: -6.0684 | CLOSS: 0.0433 | TLOSS: -0.0012 | TVAL: 0.0247 | AENT: -1.0507
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1065 | S: 54288 | R: 13.7450 | D: 25.2 s | BR: 0.2820 | ALOSS: -6.3421 | CLOSS: 0.0475 | TLOSS: 0.0004 | TVAL: 0.0247 | AENT: -0.9820
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1066 | S: 54339 | R: 15.2879 | D: 24.6 s | BR: 0.2693 | ALOSS: -6.0872 | CLOSS: 0.0389 | TLOSS: 0.0002 | TVAL: 0.0246 | AENT: -0.9902
0
[0.97686249 2.00028078 2.99999994] [ 9.9

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1089 | S: 55512 | R: 14.1310 | D: 24.8 s | BR: 0.2770 | ALOSS: -6.3319 | CLOSS: 0.0334 | TLOSS: -0.0009 | TVAL: 0.0252 | AENT: -1.0375
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1090 | S: 55563 | R: 14.1781 | D: 24.4 s | BR: 0.2755 | ALOSS: -6.3679 | CLOSS: 0.0286 | TLOSS: 0.0004 | TVAL: 0.0253 | AENT: -0.9835
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1091 | S: 55614 | R: 13.5376 | D: 24.1 s | BR: 0.2650 | ALOSS: -6.1404 | CLOSS: 0.0345 | TLOSS: 0.0029 | TVAL: 0.0252 | AENT: -0.8853
0
[0.97686249 2.00028078 2.99999994] [ 9.9

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1114 | S: 56787 | R: 14.5755 | D: 24.0 s | BR: 0.2613 | ALOSS: -6.0981 | CLOSS: 0.0307 | TLOSS: -0.0005 | TVAL: 0.0242 | AENT: -1.0208
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1115 | S: 56838 | R: 11.3182 | D: 24.1 s | BR: 0.2678 | ALOSS: -6.2774 | CLOSS: 0.0413 | TLOSS: -0.0013 | TVAL: 0.0242 | AENT: -1.0537
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1116 | S: 56889 | R: 14.5907 | D: 24.1 s | BR: 0.2564 | ALOSS: -6.1033 | CLOSS: 0.0299 | TLOSS: -0.0008 | TVAL: 0.0243 | AENT: -1.0345
0
[0.97686249 2.00028078 2.99999994] [ 9

0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [32meval[0m  | E: 1138 | S: 58011 | R: 17.5781
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1139 | S: 58062 | R: 14.2207 | D: 97.0 s | BR: 0.2725 | ALOSS: -6.0076 | CLOSS: 0.0329 | TLOSS: 0.0014 | TVAL: 0.0253 | AENT: -0.9434
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1140 | S: 58113 | R: 14.8922 | D: 23.8 s | BR: 0.2818 | ALOSS: -6.0736 | CLOSS: 0.0454 | TLOSS: -0.0002 | TVAL: 0.0252 | AENT: -1.0063
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+0

| [33mtrain[0m | E: 1163 | S: 59286 | R: 14.9156 | D: 24.2 s | BR: 0.2707 | ALOSS: -5.9632 | CLOSS: 0.0398 | TLOSS: -0.0003 | TVAL: 0.0250 | AENT: -1.0123
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1164 | S: 59337 | R: 14.8078 | D: 24.2 s | BR: 0.2766 | ALOSS: -5.9727 | CLOSS: 0.0444 | TLOSS: 0.0004 | TVAL: 0.0250 | AENT: -0.9822
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1165 | S: 59388 | R: 15.7796 | D: 24.3 s | BR: 0.2838 | ALOSS: -6.0254 | CLOSS: 0.0475 | TLOSS: -0.0023 | TVAL: 0.0251 | AENT: -1.0913
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1166 | S: 59439 | R

| [33mtrain[0m | E: 1188 | S: 60561 | R: 14.6321 | D: 24.2 s | BR: 0.2663 | ALOSS: -6.0261 | CLOSS: 0.0364 | TLOSS: 0.0001 | TVAL: 0.0252 | AENT: -0.9967
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1189 | S: 60612 | R: 13.4286 | D: 24.1 s | BR: 0.2594 | ALOSS: -6.0718 | CLOSS: 0.0373 | TLOSS: 0.0002 | TVAL: 0.0252 | AENT: -0.9910
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1190 | S: 60663 | R: 13.3385 | D: 24.1 s | BR: 0.2701 | ALOSS: -6.0112 | CLOSS: 0.0481 | TLOSS: 0.0003 | TVAL: 0.0252 | AENT: -0.9879
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1191 | S: 60714 | R: 

| [33mtrain[0m | E: 1213 | S: 61836 | R: 13.0136 | D: 24.1 s | BR: 0.2809 | ALOSS: -6.1070 | CLOSS: 0.0365 | TLOSS: -0.0001 | TVAL: 0.0277 | AENT: -1.0051
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1214 | S: 61887 | R: 11.3266 | D: 24.0 s | BR: 0.2736 | ALOSS: -5.9720 | CLOSS: 0.0369 | TLOSS: -0.0010 | TVAL: 0.0278 | AENT: -1.0358
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1215 | S: 61938 | R: 13.4872 | D: 24.0 s | BR: 0.2813 | ALOSS: -5.8215 | CLOSS: 0.0472 | TLOSS: -0.0019 | TVAL: 0.0279 | AENT: -1.0668
0
[0.97686249 2.00028078 2.99999994] [ 9.99980604e-01 -6.22825676e-03  2.04357705e-06] [0.97686249 2.00028078 4.99999994] [6.123234e-17 0.000000e+00 1.000000e+00]
| [33mtrain[0m | E: 1216 | S: 61989 | 