In [3]:
from logging import config
import os
import datetime
from typing import Optional, Tuple
import json

os.environ["WANDB_NOTEBOOK_NAME"] = ".\Tianshow_Centralized_Training"

import numpy as np
import torch
from tianshou.data import Collector, VectorReplayBuffer, PrioritizedVectorReplayBuffer
from tianshou.env import DummyVectorEnv

from tianshou.policy import BasePolicy, DQNPolicy, MultiAgentPolicyManager, RandomPolicy, RainbowPolicy
from tianshou.trainer import OffpolicyTrainer
from torch.utils.tensorboard import SummaryWriter

from LOTZ.LOTZ_env import LeadingOnesTrailingZerosEnv

# from TaskAllocation.RL_Policies.MultiHead_SISL import MultiHead_SISL
from TaskAllocation.RL_Policies.DNN_LOTZ import DNN_LOTZ
from TaskAllocation.RL_Policies.MultiHead_LOTZ import MultiHead_LOTZ
from TaskAllocation.RL_Policies.ATT_LOTZ import ATT_LOTZ
# from TaskAllocation.RL_Policies.CNN_SISL import CNN_SISL

from TaskAllocation.RL_Policies.Custom_Classes import CustomNet
from TaskAllocation.RL_Policies.Custom_Classes import CustomCollector
from TaskAllocation.RL_Policies.Custom_Classes import CustomParallelToAECWrapper


#----------------------------------#
from tianshou.env.pettingzoo_env import PettingZooEnv
from typing import Any
from gymnasium import spaces

def _reset(self, *args: Any, **kwargs: Any) -> tuple[dict, dict]:
        self.env.reset(*args, **kwargs)

        observation, reward, terminated, truncated, info = self.env.last()

        if isinstance(observation, dict) and "action_mask" in observation:
            observation_dict = {
                "agent_id": self.env.agent_selection,
                "obs": observation["observation"],
                "mask": [obm == 1 for obm in observation["action_mask"]],
            }
        else:
            if isinstance(self.action_space, spaces.Discrete):
                observation_dict = {
                    "agent_id": self.env.agent_selection,
                    "obs": observation,
                    "mask": [True] * self.env.action_space(self.env.agent_selection).n,
                }
            else:
                observation_dict = {
                    "agent_id": self.env.agent_selection,
                    "obs": observation,
                }

        return observation_dict, info
    
PettingZooEnv.reset = _reset
    
# --- Add specific modification to tianshou -----#
import wandb
from tianshou.utils import WandbLogger
from tianshou.utils.logger.base import LOG_DATA_TYPE
def new_write(self, step_type: str, step: int, data: LOG_DATA_TYPE) -> None:
    data[step_type] = step
    wandb.log(data)
WandbLogger.write = new_write 

#from tianshou_DQN import train
project = "LOTZ_Eval"
model  =  "MultiHead_LOTZ" #"MultiHead_SISL" 
test_num  =  "_NOV01"
policyModel  =  "DQN"

train_env_num = 20
test_env_num = 20

name = model + test_num

load_policy_name = f'policy_MultiHead_LOTZ_NOV01.pth'
save_policy_name = f'policy_{name}'
policy_path = "policy_LOTZ"

same_policy = True
load_model = False

# log
now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
log_name = name + str(now)

log_path = os.path.join('./', "Logs", "dqn_sisl", log_name)

LOTZ_Config = {
    "string_length": 256,
    "n_agents": 2,
    "seed": 0,
    "m_steps": 512,
    "sp": 0
}

max_cycles = LOTZ_Config["m_steps"]
n_agents = 2

dqn_params = {"discount_factor": 0.98, 
              "estimation_step": 10, 
              "target_update_freq": 100 * max_cycles,
              "optminizer": "Adam",
              "lr": 1e-2 }

trainer_params = {"max_epoch": 1000,
                  "step_per_epoch": 250 * max_cycles,
                  "step_per_collect": max_cycles * 50,
                  "episode_per_test": 20,
                  "batch_size" : max_cycles * 10 ,
                  "update_per_step": 1 / (max_cycles * 5), #Only run after close a Collect (run many times as necessary to meet the value)
                  "tn_eps_max": 0.15,
                  "ts_eps_max": 0.0,
                  }

runConfig = dqn_params
runConfig.update(trainer_params) 
runConfig.update(LOTZ_Config)

model_load_path = os.path.join(policy_path, load_policy_name)  
model_save_path = os.path.join(policy_path, save_policy_name)        
os.makedirs(os.path.join(policy_path), exist_ok=True)  
os.makedirs(os.path.join(log_path), exist_ok=True)

def _get_agents(
    agent_learn: Optional[BasePolicy] = None,
    agent_opponent: Optional[BasePolicy] = None,
    optim: Optional[torch.optim.Optimizer] = None,
    policy_load_path = None,
) -> Tuple[BasePolicy, torch.optim.Optimizer, list]:
    
    env = _get_env()       
    agent_observation_space = env.observation_space
   
    action_shape = env.action_space

    print("Action_Shape: ", action_shape)
    print("agent_observation_space: ", agent_observation_space)
    
    device="cuda" if torch.cuda.is_available() else "cpu"          
    
    if agent_learn is None:      
        
        if model == "MultiHead_LOTZ":
            net = MultiHead_LOTZ(
                obs_shape=agent_observation_space,                
                action_shape=action_shape, 
                max_len = LOTZ_Config["string_length"],
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "DNN_LOTZ":
            net = DNN_LOTZ(
                obs_shape=agent_observation_space,                
                action_shape=action_shape,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "ATT_LOTZ":
            net = ATT_LOTZ(
                obs_shape=agent_observation_space,                
                action_shape=action_shape,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)
           

        if optim is None:
            optim = torch.optim.Adam(net.parameters(), lr=dqn_params["lr"], weight_decay=0.0, amsgrad= False )                
    
        if policyModel == "DQN":
            agent_learn = DQNPolicy(
                model=net,
                optim=optim,
                action_space = action_shape,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
                reward_normalization = False,
                clip_loss_grad = False 
            ) 
                     

        if policyModel == "Rainbow":
            agent_learn = RainbowPolicy(
                model=net.to(device),
                optim=optim,
                action_space = action_shape,
                num_atoms= 5,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
            ) 
         
 
        if load_model is True:
            # Load the saved checkpoint             
            agent_learn.load_state_dict(torch.load(model_load_path))
            print(f'Loaded-> {model_load_path}')
                   
        #print(env.agents)
        #agents = [agent_learn for _ in range(len(env.agents))]
        
        agents = [agent_learn for _ in range(len(env.agents))]

        
    policy = MultiAgentPolicyManager(policies = agents, env=env)  
        
    return policy, optim, env.agents

def _get_env():
    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    # env = pursuit_v4.env()
    env = LeadingOnesTrailingZerosEnv(
        string_length= LOTZ_Config["string_length"], 
        n_agents=LOTZ_Config["n_agents"], 
        seed=LOTZ_Config["seed"], 
        m_steps = LOTZ_Config["m_steps"], 
        sp = LOTZ_Config["sp"] )
    
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    
    return PettingZooEnv(env)
    # return env

print(json.dumps(runConfig, indent=4))

{
    "discount_factor": 0.98,
    "estimation_step": 10,
    "target_update_freq": 51200,
    "optminizer": "Adam",
    "lr": 0.01,
    "max_epoch": 1000,
    "step_per_epoch": 128000,
    "step_per_collect": 25600,
    "episode_per_test": 20,
    "batch_size": 5120,
    "update_per_step": 0.000390625,
    "tn_eps_max": 0.15,
    "ts_eps_max": 0.0,
    "string_length": 256,
    "n_agents": 2,
    "seed": 0,
    "m_steps": 512,
    "sp": 0
}




In [4]:
if __name__ == "__main__":
                        
    torch.set_grad_enabled(True) 
   
    # ======== Step 1: Environment setup =========
    train_envs = DummyVectorEnv([_get_env for _ in range(train_env_num)])
    test_envs = DummyVectorEnv([_get_env for _ in range(test_env_num)]) 

    # seed
    seed = 0
    np.random.seed(seed)
    
    torch.manual_seed(seed)

    train_envs.seed(seed)
    test_envs.seed(seed)

    # ======== Step 2: Agent setup =========
    policy, optim, agents = _get_agents()    

    agentsBuffer = PrioritizedVectorReplayBuffer( 300_000, len(train_envs), alpha=0.6, beta=0.4)  
    # ======== Step 3: Collector setup =========
    train_collector = Collector(
        policy,
        train_envs,
        # VectorReplayBuffer(300_000, len(train_envs)),
        agentsBuffer,
        #ListReplayBuffer(100000)       
        exploration_noise=True             
    )
    test_collector = Collector(policy, test_envs, exploration_noise=False)
     
    print("Buffer Warming Up ")    
    for i in range(1):#int(trainer_params['batch_size'] / (300 * 10 ) )):
        
        train_collector.collect(n_episode=train_env_num)#,random=True) #trainer_params['batch_size'] * train_env_num))
        #train_collector.collect(n_step=300 * 10)
        print(".", end="") 
    
    len_buffer = len(train_collector.buffer) #/ (SISL_Config["max_cycles"] * SISL_Config["n_pursuers"])
    print("\nBuffer Lenght: ", len_buffer ) 
    
    info = { "Buffer"  : "ReplayBuffer", " Warmup_ep" : len_buffer}
    # ======== tensorboard logging setup =========                       
    logger = WandbLogger(
        train_interval = runConfig["m_steps"] * runConfig["n_agents"] ,
        test_interval = 1,#runConfig["max_cycles"] * runConfig["n_pursuers"],
        update_interval = runConfig["m_steps"],
        save_interval = 1,
        write_flush = True,
        project = project,
        name = log_name,
        entity = None,
        run_id = log_name,
        config = runConfig,
        monitor_gym = True )
    
    writer = SummaryWriter(log_path)    
    writer.add_text("args", str(runConfig))    
    logger.load(writer)
    
    # ======== Step 4: Callback functions setup =========
    def save_best_fn(policy):                
        
        torch.save(policy.policies[agents[0]].state_dict(), model_save_path + ".pth")
        print("Best Saved")
        

    def stop_fn(mean_rewards):
        return mean_rewards >= 99999939.0

    def train_fn(epoch, env_step):
        epsilon = trainer_params['tn_eps_max'] - (trainer_params['tn_eps_max'] - trainer_params['tn_eps_max']/100)*(epoch/trainer_params['max_epoch'])          
        if same_policy:
            policy.policies[agents[0]].set_eps(epsilon)
        else:
            policy.policies['R_agent0'].set_eps(epsilon)
            policy.policies['F_agent0'].set_eps(epsilon)
        
        # if env_step % 500 == 0:
            # logger.write("train/env_step", env_step, {"train/eps": eps})


    def test_fn(epoch, env_step):
        epsilon = trainer_params['ts_eps_max']#0.01#max(0.001, 0.1 - epoch * 0.001)
        if same_policy:
            policy.policies[agents[0]].set_eps(epsilon)
        else:
            policy.policies['R_agent0'].set_eps(epsilon)
            policy.policies['F_agent0'].set_eps(epsilon)

        
    def reward_metric(rews):       
        #print(rews)
        return rews#[:, 1]

    # # ======== Step 5: Run the trainer =========
    offPolicyTrainer = OffpolicyTrainer(
        policy=policy,
        train_collector=train_collector,
        test_collector=test_collector,  
        buffer= agentsBuffer,      
        max_epoch=trainer_params['max_epoch'],
        step_per_epoch=trainer_params['step_per_epoch'],
        step_per_collect=trainer_params['step_per_collect'],        
        episode_per_test= trainer_params['episode_per_test'],
        batch_size=trainer_params['batch_size'],
        train_fn=train_fn,
        test_fn=test_fn,
        stop_fn=stop_fn,
        save_best_fn=save_best_fn,
        update_per_step=trainer_params['update_per_step'],
        logger=logger,
        test_in_train=True,
        reward_metric=reward_metric,
        show_progress = True 
               
        )
    
    result = offPolicyTrainer.run()
    writer.close()
    # return result, policy.policies[agents[1]]
    print(f"\n==========Result==========\n{result}")
    print("\n(the trained policy can be accessed via policy.policies[agents[0]])")

Action_Shape:  Discrete(256)
agent_observation_space:  Box(0, 1, (256,), int32)
Buffer Warming Up 




.
Buffer Lenght:  10240
Best Saved


Epoch #1: 128001it [01:46, 1202.38it/s, agent0/loss=132099.617, env_step=128000, len=512, n/ep=40, n/st=25600, rew=-14297.56]                            


Epoch #1: test_reward: -14961.375000 ± 15242.429924, best_reward: -8391.325000 ± 14370.458109 in #0


Epoch #2: 128001it [01:44, 1224.56it/s, agent0/loss=88763.014, env_step=256000, len=512, n/ep=60, n/st=25600, rew=-13933.29]                            


Epoch #2: test_reward: -14114.750000 ± 14415.679352, best_reward: -8391.325000 ± 14370.458109 in #0


Epoch #3: 128001it [01:44, 1225.89it/s, agent0/loss=44455.418, env_step=384000, len=512, n/ep=40, n/st=25600, rew=-13808.44]                            


Epoch #3: test_reward: -13900.250000 ± 14190.153186, best_reward: -8391.325000 ± 14370.458109 in #0


Epoch #4: 128001it [01:45, 1212.41it/s, agent0/loss=44371.334, env_step=512000, len=512, n/ep=60, n/st=25600, rew=-13811.42]                            


Epoch #4: test_reward: -14672.125000 ± 14993.000571, best_reward: -8391.325000 ± 14370.458109 in #0


Epoch #5: 128001it [01:44, 1222.32it/s, agent0/loss=46421.872, env_step=640000, len=512, n/ep=40, n/st=25600, rew=-14196.81]                            


Epoch #5: test_reward: -14101.750000 ± 14406.036606, best_reward: -8391.325000 ± 14370.458109 in #0


Epoch #6: 128001it [01:44, 1230.28it/s, agent0/loss=48228.345, env_step=768000, len=512, n/ep=60, n/st=25600, rew=-14335.21]                            


Epoch #6: test_reward: -15161.250000 ± 15428.683140, best_reward: -8391.325000 ± 14370.458109 in #0


Epoch #7: 128001it [01:44, 1230.26it/s, agent0/loss=48865.107, env_step=896000, len=512, n/ep=40, n/st=25600, rew=-14932.12]                            


Epoch #7: test_reward: -14694.875000 ± 15008.346490, best_reward: -8391.325000 ± 14370.458109 in #0


Epoch #8: 128001it [01:42, 1245.32it/s, agent0/loss=54770.103, env_step=1024000, len=512, n/ep=60, n/st=25600, rew=-9351.00]                            


Epoch #8: test_reward: -9990.425000 ± 15246.658403, best_reward: -8391.325000 ± 14370.458109 in #0


Epoch #9: 128001it [01:43, 1238.54it/s, agent0/loss=73211.103, env_step=1152000, len=512, n/ep=40, n/st=25600, rew=-8473.56]                            


Best Saved
Epoch #9: test_reward: -7468.825000 ± 13908.091323, best_reward: -7468.825000 ± 13908.091323 in #9


Epoch #10: 128001it [01:43, 1239.74it/s, agent0/loss=93297.850, env_step=1280000, len=512, n/ep=60, n/st=25600, rew=-7697.27]                            


Epoch #10: test_reward: -10018.600000 ± 15230.041103, best_reward: -7468.825000 ± 13908.091323 in #9


Epoch #11: 128001it [01:42, 1243.16it/s, agent0/loss=100790.591, env_step=1408000, len=512, n/ep=40, n/st=25600, rew=-9058.52]                            


Best Saved
Epoch #11: test_reward: -6597.525000 ± 13342.297062, best_reward: -6597.525000 ± 13342.297062 in #11


Epoch #12: 128001it [01:44, 1224.99it/s, agent0/loss=99902.897, env_step=1536000, len=512, n/ep=60, n/st=25600, rew=-8708.06]                              


Best Saved
Epoch #12: test_reward: -4116.600000 ± 11023.274270, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #13: 128001it [01:43, 1239.16it/s, agent0/loss=98770.470, env_step=1664000, len=512, n/ep=40, n/st=25600, rew=-8925.05]                            


Epoch #13: test_reward: -9115.825000 ± 14882.971924, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #14: 128001it [01:42, 1245.07it/s, agent0/loss=100643.512, env_step=1792000, len=512, n/ep=60, n/st=25600, rew=-9865.93]                            


Epoch #14: test_reward: -9080.025000 ± 14905.850919, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #15: 128001it [01:43, 1236.56it/s, agent0/loss=102677.152, env_step=1920000, len=512, n/ep=40, n/st=25600, rew=-11008.80]                            


Epoch #15: test_reward: -6583.900000 ± 13349.777174, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #16: 128001it [01:43, 1232.45it/s, agent0/loss=99957.405, env_step=2048000, len=512, n/ep=60, n/st=25600, rew=-10268.92]                            


Epoch #16: test_reward: -9925.250000 ± 15290.099921, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #17: 128001it [01:42, 1245.34it/s, agent0/loss=94099.490, env_step=2176000, len=512, n/ep=40, n/st=25600, rew=-6820.86]                             


Epoch #17: test_reward: -10806.500000 ± 15594.160534, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #18: 128001it [01:44, 1224.28it/s, agent0/loss=90029.370, env_step=2304000, len=512, n/ep=60, n/st=25600, rew=-8190.72]                            


Epoch #18: test_reward: -9930.200000 ± 15287.259191, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #19: 128001it [01:43, 1235.69it/s, agent0/loss=87369.685, env_step=2432000, len=512, n/ep=40, n/st=25600, rew=-8734.50]                            


Epoch #19: test_reward: -10753.375000 ± 15631.905238, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #20: 128001it [01:42, 1248.96it/s, agent0/loss=90410.455, env_step=2560000, len=512, n/ep=60, n/st=25600, rew=-8937.88]                            


Epoch #20: test_reward: -7419.950000 ± 13934.369738, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #21: 128001it [01:43, 1235.05it/s, agent0/loss=93273.909, env_step=2688000, len=512, n/ep=40, n/st=25600, rew=-9776.54]                             


Epoch #21: test_reward: -4271.325000 ± 10966.592163, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #22: 128001it [01:42, 1249.90it/s, agent0/loss=87887.111, env_step=2816000, len=512, n/ep=60, n/st=25600, rew=-8228.48]                            


Epoch #22: test_reward: -7526.600000 ± 13876.952624, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #23: 128001it [01:42, 1243.97it/s, agent0/loss=88539.894, env_step=2944000, len=512, n/ep=40, n/st=25600, rew=-7692.79]                            


Epoch #23: test_reward: -4122.250000 ± 4147.884092, best_reward: -4116.600000 ± 11023.274270 in #12


Epoch #24: 128001it [01:42, 1247.89it/s, agent0/loss=86196.514, env_step=3072000, len=512, n/ep=60, n/st=25600, rew=-8010.40]                            


Best Saved
Epoch #24: test_reward: -4039.250000 ± 4053.693617, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #25: 128001it [01:42, 1251.74it/s, agent0/loss=74094.739, env_step=3200000, len=512, n/ep=40, n/st=25600, rew=-12440.83]                            


Epoch #25: test_reward: -8014.425000 ± 12016.878736, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #26: 128001it [01:42, 1247.22it/s, agent0/loss=64657.827, env_step=3328000, len=512, n/ep=60, n/st=25600, rew=-11430.61]                            


Epoch #26: test_reward: -7508.825000 ± 12092.668341, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #27: 128001it [01:43, 1241.48it/s, agent0/loss=57756.847, env_step=3456000, len=512, n/ep=40, n/st=25600, rew=-11306.59]                            


Epoch #27: test_reward: -10966.925000 ± 14726.345864, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #28: 128001it [01:44, 1230.16it/s, agent0/loss=54515.423, env_step=3584000, len=512, n/ep=60, n/st=25600, rew=-11567.10]                            


Epoch #28: test_reward: -9460.450000 ± 13879.140606, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #29: 128001it [01:43, 1234.91it/s, agent0/loss=54713.110, env_step=3712000, len=512, n/ep=40, n/st=25600, rew=-11738.89]                            


Epoch #29: test_reward: -7957.350000 ± 12801.410607, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #30: 128001it [01:43, 1239.23it/s, agent0/loss=55382.845, env_step=3840000, len=512, n/ep=60, n/st=25600, rew=-11303.22]                            


Epoch #30: test_reward: -6488.450000 ± 11422.577857, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #31: 128001it [01:42, 1246.20it/s, agent0/loss=55945.391, env_step=3968000, len=512, n/ep=40, n/st=25600, rew=-11004.33]                            


Epoch #31: test_reward: -7882.650000 ± 12822.667604, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #32: 128001it [01:42, 1251.22it/s, agent0/loss=56512.616, env_step=4096000, len=512, n/ep=60, n/st=25600, rew=-11284.32]                            


Epoch #32: test_reward: -8536.000000 ± 13430.759807, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #33: 128001it [01:43, 1239.59it/s, agent0/loss=57565.367, env_step=4224000, len=512, n/ep=40, n/st=25600, rew=-9080.31]                             


Epoch #33: test_reward: -9237.850000 ± 13963.171706, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #34: 128001it [01:44, 1229.36it/s, agent0/loss=58672.470, env_step=4352000, len=512, n/ep=60, n/st=25600, rew=-8687.58]                             


Epoch #34: test_reward: -9930.225000 ± 14445.821897, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #35: 128001it [01:42, 1244.70it/s, agent0/loss=59396.374, env_step=4480000, len=512, n/ep=40, n/st=25600, rew=-9736.73]                             


Epoch #35: test_reward: -12104.500000 ± 15573.609959, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #36: 128001it [01:43, 1238.17it/s, agent0/loss=59648.263, env_step=4608000, len=512, n/ep=60, n/st=25600, rew=-10942.01]                            


Epoch #36: test_reward: -8327.900000 ± 13507.422811, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #37: 128001it [01:43, 1241.26it/s, agent0/loss=59539.592, env_step=4736000, len=512, n/ep=40, n/st=25600, rew=-11217.23]                            


Epoch #37: test_reward: -8264.400000 ± 13531.832965, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #38: 128001it [01:42, 1243.44it/s, agent0/loss=59724.571, env_step=4864000, len=512, n/ep=60, n/st=25600, rew=-10805.51]                            


Epoch #38: test_reward: -10511.975000 ± 14940.177272, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #39: 128001it [01:43, 1239.29it/s, agent0/loss=60535.802, env_step=4992000, len=512, n/ep=40, n/st=25600, rew=-9475.94]                             


Epoch #39: test_reward: -11287.150000 ± 15292.295785, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #40: 128001it [01:43, 1239.06it/s, agent0/loss=61076.721, env_step=5120000, len=512, n/ep=60, n/st=25600, rew=-10370.52]                            


Epoch #40: test_reward: -6680.125000 ± 12308.013995, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #41: 128001it [01:43, 1240.69it/s, agent0/loss=61108.921, env_step=5248000, len=512, n/ep=40, n/st=25600, rew=-9250.80]                             


Epoch #41: test_reward: -8181.700000 ± 13565.433805, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #42: 128001it [01:42, 1248.55it/s, agent0/loss=61167.505, env_step=5376000, len=512, n/ep=60, n/st=25600, rew=-10647.38]                            


Epoch #42: test_reward: -7382.025000 ± 12992.603264, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #43: 128001it [01:43, 1233.33it/s, agent0/loss=61634.405, env_step=5504000, len=512, n/ep=40, n/st=25600, rew=-10260.06]                            


Epoch #43: test_reward: -8165.275000 ± 13572.954192, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #44: 128001it [01:42, 1244.60it/s, agent0/loss=62151.449, env_step=5632000, len=512, n/ep=60, n/st=25600, rew=-9195.07]                             


Epoch #44: test_reward: -9686.425000 ± 14561.562972, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #45: 128001it [01:41, 1257.64it/s, agent0/loss=62393.159, env_step=5760000, len=512, n/ep=40, n/st=25600, rew=-10160.74]                            


Epoch #45: test_reward: -7348.025000 ± 13004.905612, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #46: 128001it [01:42, 1253.98it/s, agent0/loss=62412.952, env_step=5888000, len=512, n/ep=60, n/st=25600, rew=-11560.61]                            


Epoch #46: test_reward: -10461.550000 ± 14966.437975, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #47: 128001it [01:43, 1237.79it/s, agent0/loss=61872.331, env_step=6016000, len=512, n/ep=40, n/st=25600, rew=-9601.90]                             


Epoch #47: test_reward: -9691.250000 ± 14559.288502, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #48: 128001it [01:43, 1237.88it/s, agent0/loss=61265.636, env_step=6144000, len=512, n/ep=60, n/st=25600, rew=-8686.51]                             


Epoch #48: test_reward: -9717.275000 ± 14546.058349, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #49: 128001it [01:43, 1234.33it/s, agent0/loss=61333.050, env_step=6272000, len=512, n/ep=40, n/st=25600, rew=-9096.40]                             


Epoch #49: test_reward: -6657.350000 ± 12315.068953, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #50: 128001it [01:42, 1243.99it/s, agent0/loss=61697.780, env_step=6400000, len=512, n/ep=60, n/st=25600, rew=-10580.08]                            


Epoch #50: test_reward: -11287.150000 ± 15291.806008, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #51: 128001it [01:43, 1237.37it/s, agent0/loss=61524.073, env_step=6528000, len=512, n/ep=40, n/st=25600, rew=-9214.23]                             


Epoch #51: test_reward: -5948.650000 ± 11550.578017, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #52: 128001it [01:42, 1248.41it/s, agent0/loss=60960.780, env_step=6656000, len=512, n/ep=60, n/st=25600, rew=-9946.42]                             


Epoch #52: test_reward: -5368.625000 ± 10656.489851, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #53: 128001it [01:43, 1234.00it/s, agent0/loss=60978.460, env_step=6784000, len=512, n/ep=40, n/st=25600, rew=-9292.30]                             


Epoch #53: test_reward: -9148.175000 ± 13998.441467, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #54: 128001it [01:42, 1247.34it/s, agent0/loss=61003.727, env_step=6912000, len=512, n/ep=60, n/st=25600, rew=-10804.88]                            


Epoch #54: test_reward: -9218.325000 ± 13969.921613, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #55: 128001it [01:43, 1233.24it/s, agent0/loss=60099.185, env_step=7040000, len=512, n/ep=40, n/st=25600, rew=-11157.75]                            


Epoch #55: test_reward: -12193.875000 ± 15522.694821, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #56: 128001it [01:43, 1240.69it/s, agent0/loss=59222.178, env_step=7168000, len=512, n/ep=60, n/st=25600, rew=-10519.41]                            


Epoch #56: test_reward: -11129.075000 ± 14662.397083, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #57: 128001it [01:43, 1232.96it/s, agent0/loss=58236.658, env_step=7296000, len=512, n/ep=40, n/st=25600, rew=-13462.75]                            


Epoch #57: test_reward: -8669.200000 ± 12646.052647, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #58: 128001it [01:43, 1234.12it/s, agent0/loss=55413.451, env_step=7424000, len=512, n/ep=60, n/st=25600, rew=-12221.33]                            


Epoch #58: test_reward: -11509.325000 ± 14541.750624, best_reward: -4039.250000 ± 4053.693617 in #24


Epoch #59: 128001it [01:43, 1237.48it/s, agent0/loss=90111.942, env_step=7552000, len=512, n/ep=40, n/st=25600, rew=-4803.30]                            


Best Saved
Epoch #59: test_reward: -3490.125000 ± 3666.943974, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #60: 128001it [01:42, 1245.87it/s, agent0/loss=161491.156, env_step=7680000, len=512, n/ep=60, n/st=25600, rew=-15098.96]                            


Epoch #60: test_reward: -15634.125000 ± 15713.694461, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #61: 128001it [01:43, 1238.96it/s, agent0/loss=181271.974, env_step=7808000, len=512, n/ep=40, n/st=25600, rew=-15118.19]                            


Epoch #61: test_reward: -15023.125000 ± 15166.132487, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #62: 128001it [01:42, 1247.27it/s, agent0/loss=103069.310, env_step=7936000, len=512, n/ep=60, n/st=25600, rew=-14938.08]                            


Epoch #62: test_reward: -15042.625000 ± 15182.147328, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #63: 128001it [01:42, 1243.66it/s, agent0/loss=37895.119, env_step=8064000, len=512, n/ep=40, n/st=25600, rew=-15200.25]                            


Epoch #63: test_reward: -15411.500000 ± 15510.291229, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #64: 128001it [01:43, 1231.65it/s, agent0/loss=35485.816, env_step=8192000, len=512, n/ep=60, n/st=25600, rew=-14932.67]                            


Epoch #64: test_reward: -15395.250000 ± 15478.242631, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #65: 128001it [01:43, 1236.78it/s, agent0/loss=33403.396, env_step=8320000, len=512, n/ep=40, n/st=25600, rew=-16231.31]                            


Epoch #65: test_reward: -16526.250000 ± 16526.764495, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #66: 128001it [01:44, 1230.71it/s, agent0/loss=27182.985, env_step=8448000, len=512, n/ep=60, n/st=25600, rew=-16605.33]                            


Epoch #66: test_reward: -16615.625000 ± 16615.732273, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #67: 128001it [01:43, 1237.79it/s, agent0/loss=16275.828, env_step=8576000, len=512, n/ep=40, n/st=25600, rew=-16596.94]                            


Epoch #67: test_reward: -16597.750000 ± 16598.119733, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #68: 128001it [01:43, 1234.45it/s, agent0/loss=6225.790, env_step=8704000, len=512, n/ep=60, n/st=25600, rew=-16623.21]                            


Epoch #68: test_reward: -16627.000000 ± 16627.059715, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #69: 128001it [01:43, 1235.68it/s, agent0/loss=5774.143, env_step=8832000, len=512, n/ep=40, n/st=25600, rew=-16599.38]                            


Epoch #69: test_reward: -16609.125000 ± 16609.242491, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #70: 128001it [01:43, 1235.43it/s, agent0/loss=11653.144, env_step=8960000, len=512, n/ep=60, n/st=25600, rew=-16609.12]                            


Epoch #70: test_reward: -16612.375000 ± 16612.548419, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #71: 128001it [01:43, 1234.77it/s, agent0/loss=12929.227, env_step=9088000, len=512, n/ep=40, n/st=25600, rew=-16609.12]                            


Epoch #71: test_reward: -16618.875000 ± 16618.965728, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #72: 128001it [01:44, 1225.14it/s, agent0/loss=6879.317, env_step=9216000, len=512, n/ep=60, n/st=25600, rew=-16610.21]                            


Epoch #72: test_reward: -16620.500000 ± 16620.674129, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #73: 128001it [01:43, 1240.69it/s, agent0/loss=1976.927, env_step=9344000, len=512, n/ep=40, n/st=25600, rew=-16613.19]                            


Epoch #73: test_reward: -16610.750000 ± 16610.882899, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #74: 128001it [01:43, 1239.36it/s, agent0/loss=1978.824, env_step=9472000, len=512, n/ep=60, n/st=25600, rew=-16595.04]                            


Epoch #74: test_reward: -16609.125000 ± 16609.267928, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #75: 128001it [01:43, 1239.92it/s, agent0/loss=1990.159, env_step=9600000, len=512, n/ep=40, n/st=25600, rew=-16625.38]                            


Epoch #75: test_reward: -16633.500000 ± 16633.522860, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #76: 128001it [01:43, 1237.87it/s, agent0/loss=1942.086, env_step=9728000, len=512, n/ep=60, n/st=25600, rew=-16612.38]                            


Epoch #76: test_reward: -16605.875000 ± 16606.094284, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #77: 128001it [01:42, 1246.12it/s, agent0/loss=1947.647, env_step=9856000, len=512, n/ep=40, n/st=25600, rew=-16622.12]                            


Epoch #77: test_reward: -16620.500000 ± 16620.553383, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #78: 128001it [01:43, 1233.80it/s, agent0/loss=1949.435, env_step=9984000, len=512, n/ep=60, n/st=25600, rew=-16637.83]                            


Epoch #78: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #79: 128001it [01:42, 1244.31it/s, agent0/loss=1950.412, env_step=10112000, len=512, n/ep=40, n/st=25600, rew=-16633.50]                            


Epoch #79: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #80: 128001it [01:43, 1238.94it/s, agent0/loss=1915.548, env_step=10240000, len=512, n/ep=60, n/st=25600, rew=-16628.08]                            


Epoch #80: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #81: 128001it [01:42, 1248.38it/s, agent0/loss=1882.473, env_step=10368000, len=512, n/ep=40, n/st=25600, rew=-16633.50]                            


Epoch #81: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #82: 128001it [01:42, 1243.59it/s, agent0/loss=1874.363, env_step=10496000, len=512, n/ep=60, n/st=25600, rew=-16625.92]                            


Epoch #82: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #83: 128001it [01:42, 1242.97it/s, agent0/loss=1893.806, env_step=10624000, len=512, n/ep=40, n/st=25600, rew=-16625.38]                            


Epoch #83: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #84: 128001it [01:43, 1239.17it/s, agent0/loss=1948.947, env_step=10752000, len=512, n/ep=60, n/st=25600, rew=-16632.42]                            


Epoch #84: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #85: 128001it [01:42, 1246.28it/s, agent0/loss=1940.244, env_step=10880000, len=512, n/ep=40, n/st=25600, rew=-16623.75]                            


Epoch #85: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #86: 128001it [01:44, 1223.16it/s, agent0/loss=1909.609, env_step=11008000, len=512, n/ep=60, n/st=25600, rew=-16630.25]                            


Epoch #86: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #87: 128001it [01:43, 1231.96it/s, agent0/loss=1934.811, env_step=11136000, len=512, n/ep=40, n/st=25600, rew=-16627.00]                            


Epoch #87: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #88: 128001it [01:44, 1221.06it/s, agent0/loss=1966.859, env_step=11264000, len=512, n/ep=60, n/st=25600, rew=-16623.75]                            


Epoch #88: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #89: 128001it [01:43, 1242.09it/s, agent0/loss=1969.273, env_step=11392000, len=512, n/ep=40, n/st=25600, rew=-16623.75]                            


Epoch #89: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #90: 128001it [01:42, 1243.24it/s, agent0/loss=1943.227, env_step=11520000, len=512, n/ep=60, n/st=25600, rew=-16628.08]                            


Epoch #90: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #91: 128001it [01:44, 1229.64it/s, agent0/loss=1921.227, env_step=11648000, len=512, n/ep=40, n/st=25600, rew=-16625.38]                            


Epoch #91: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #92: 128001it [01:43, 1242.09it/s, agent0/loss=1899.413, env_step=11776000, len=512, n/ep=60, n/st=25600, rew=-16625.92]                            


Epoch #92: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #93: 128001it [01:43, 1240.33it/s, agent0/loss=1922.576, env_step=11904000, len=512, n/ep=40, n/st=25600, rew=-16630.25]                            


Epoch #93: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #94: 128001it [01:43, 1232.56it/s, agent0/loss=1950.281, env_step=12032000, len=512, n/ep=60, n/st=25600, rew=-16625.92]                            


Epoch #94: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #95: 128001it [01:42, 1244.28it/s, agent0/loss=1896.697, env_step=12160000, len=512, n/ep=40, n/st=25600, rew=-16628.62]                            


Epoch #95: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #96: 128001it [01:43, 1236.94it/s, agent0/loss=1877.522, env_step=12288000, len=512, n/ep=60, n/st=25600, rew=-16633.50]                            


Epoch #96: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #97: 128001it [01:44, 1227.52it/s, agent0/loss=1877.065, env_step=12416000, len=512, n/ep=40, n/st=25600, rew=-16633.50]                            


Epoch #97: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #98: 128001it [01:43, 1239.57it/s, agent0/loss=1905.164, env_step=12544000, len=512, n/ep=60, n/st=25600, rew=-16631.33]                            


Epoch #98: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #99: 128001it [01:43, 1235.52it/s, agent0/loss=1941.957, env_step=12672000, len=512, n/ep=40, n/st=25600, rew=-16625.38]                            


Epoch #99: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #100: 128001it [01:43, 1235.10it/s, agent0/loss=1902.041, env_step=12800000, len=512, n/ep=60, n/st=25600, rew=-16623.75]                            


Epoch #100: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #101: 128001it [01:43, 1233.22it/s, agent0/loss=1881.166, env_step=12928000, len=512, n/ep=40, n/st=25600, rew=-16630.25]                            


Epoch #101: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #102: 128001it [01:42, 1251.91it/s, agent0/loss=1925.648, env_step=13056000, len=512, n/ep=60, n/st=25600, rew=-16628.08]                            


Epoch #102: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #103: 128001it [01:42, 1245.41it/s, agent0/loss=1939.404, env_step=13184000, len=512, n/ep=40, n/st=25600, rew=-16625.38]                            


Epoch #103: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #104: 128001it [01:43, 1241.47it/s, agent0/loss=1919.335, env_step=13312000, len=512, n/ep=60, n/st=25600, rew=-16623.75]                            


Epoch #104: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #105: 128001it [01:43, 1235.63it/s, agent0/loss=1912.327, env_step=13440000, len=512, n/ep=40, n/st=25600, rew=-16627.81]                            


Epoch #105: test_reward: -16636.750000 ± 16636.762063, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #106: 128001it [01:42, 1246.97it/s, agent0/loss=1886.248, env_step=13568000, len=512, n/ep=60, n/st=25600, rew=-16614.00]                            


Epoch #106: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #107: 128001it [01:42, 1243.94it/s, agent0/loss=1911.495, env_step=13696000, len=512, n/ep=40, n/st=25600, rew=-16635.12]                            


Epoch #107: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #108: 128001it [01:43, 1239.63it/s, agent0/loss=1928.934, env_step=13824000, len=512, n/ep=60, n/st=25600, rew=-16632.42]                            


Epoch #108: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #109: 128001it [01:43, 1237.33it/s, agent0/loss=1938.264, env_step=13952000, len=512, n/ep=40, n/st=25600, rew=-16620.50]                            


Epoch #109: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #110: 128001it [01:43, 1241.96it/s, agent0/loss=1937.496, env_step=14080000, len=512, n/ep=60, n/st=25600, rew=-16624.83]                            


Epoch #110: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #111: 128001it [01:42, 1252.53it/s, agent0/loss=1911.609, env_step=14208000, len=512, n/ep=40, n/st=25600, rew=-16623.75]                            


Epoch #111: test_reward: -16627.000000 ± 16627.040657, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #112: 128001it [01:42, 1248.77it/s, agent0/loss=1922.518, env_step=14336000, len=512, n/ep=60, n/st=25600, rew=-16625.92]                            


Epoch #112: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #113: 128001it [01:42, 1246.08it/s, agent0/loss=1945.583, env_step=14464000, len=512, n/ep=40, n/st=25600, rew=-16628.62]                            


Epoch #113: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #114: 128001it [01:43, 1242.70it/s, agent0/loss=1987.593, env_step=14592000, len=512, n/ep=60, n/st=25600, rew=-16632.42]                            


Epoch #114: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #115: 128001it [01:43, 1235.57it/s, agent0/loss=1985.755, env_step=14720000, len=512, n/ep=40, n/st=25600, rew=-16628.62]                            


Epoch #115: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #116: 128001it [01:42, 1242.75it/s, agent0/loss=1927.377, env_step=14848000, len=512, n/ep=60, n/st=25600, rew=-16624.27]                            


Epoch #116: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #117: 128001it [01:43, 1238.14it/s, agent0/loss=1887.799, env_step=14976000, len=512, n/ep=40, n/st=25600, rew=-16627.00]                            


Epoch #117: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #118: 128001it [01:42, 1243.34it/s, agent0/loss=1916.079, env_step=15104000, len=512, n/ep=60, n/st=25600, rew=-16624.83]                            


Epoch #118: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #119: 128001it [01:43, 1236.88it/s, agent0/loss=1925.595, env_step=15232000, len=512, n/ep=40, n/st=25600, rew=-16631.06]                            


Epoch #119: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #120: 128001it [01:43, 1235.66it/s, agent0/loss=1902.248, env_step=15360000, len=512, n/ep=60, n/st=25600, rew=-16625.92]                            


Epoch #120: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #121: 128001it [01:43, 1239.04it/s, agent0/loss=1918.734, env_step=15488000, len=512, n/ep=40, n/st=25600, rew=-16635.12]                            


Epoch #121: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #122: 128001it [01:43, 1242.22it/s, agent0/loss=1945.096, env_step=15616000, len=512, n/ep=60, n/st=25600, rew=-16628.08]                            


Epoch #122: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #123: 128001it [01:43, 1237.55it/s, agent0/loss=1958.090, env_step=15744000, len=512, n/ep=40, n/st=25600, rew=-16617.25]                            


Epoch #123: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #124: 128001it [01:42, 1245.44it/s, agent0/loss=1932.397, env_step=15872000, len=512, n/ep=60, n/st=25600, rew=-16611.83]                            


Epoch #124: test_reward: -16623.750000 ± 16623.797654, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #125: 128001it [01:43, 1238.46it/s, agent0/loss=1921.980, env_step=16000000, len=512, n/ep=40, n/st=25600, rew=-16614.00]                            


Epoch #125: test_reward: -16623.750000 ± 16623.797654, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #126: 128001it [01:43, 1240.77it/s, agent0/loss=1926.713, env_step=16128000, len=512, n/ep=60, n/st=25600, rew=-16618.33]                            


Epoch #126: test_reward: -16617.250000 ± 16617.307843, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #127: 128001it [01:43, 1242.27it/s, agent0/loss=1931.651, env_step=16256000, len=512, n/ep=40, n/st=25600, rew=-16633.50]                            


Epoch #127: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #128: 128001it [01:42, 1252.35it/s, agent0/loss=1948.726, env_step=16384000, len=512, n/ep=60, n/st=25600, rew=-16630.25]                            


Epoch #128: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #129: 128001it [01:42, 1251.02it/s, agent0/loss=1953.987, env_step=16512000, len=512, n/ep=40, n/st=25600, rew=-16612.38]                            


Epoch #129: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #130: 128001it [01:43, 1232.44it/s, agent0/loss=1946.831, env_step=16640000, len=512, n/ep=60, n/st=25600, rew=-16622.67]                            


Epoch #130: test_reward: -16627.000000 ± 16627.040657, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #131: 128001it [01:42, 1244.44it/s, agent0/loss=1894.828, env_step=16768000, len=512, n/ep=40, n/st=25600, rew=-16631.05]                            


Epoch #131: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #132: 128001it [01:42, 1245.90it/s, agent0/loss=1894.683, env_step=16896000, len=512, n/ep=60, n/st=25600, rew=-16627.00]                            


Epoch #132: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #133: 128001it [01:42, 1245.22it/s, agent0/loss=1939.460, env_step=17024000, len=512, n/ep=40, n/st=25600, rew=-16628.62]                            


Epoch #133: test_reward: -16623.750000 ± 16623.797654, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #134: 128001it [01:43, 1239.12it/s, agent0/loss=1930.081, env_step=17152000, len=512, n/ep=60, n/st=25600, rew=-16612.92]                            


Epoch #134: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #135: 128001it [01:43, 1239.02it/s, agent0/loss=1930.023, env_step=17280000, len=512, n/ep=40, n/st=25600, rew=-16609.12]                            


Epoch #135: test_reward: -16617.250000 ± 16617.307843, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #136: 128001it [01:42, 1244.96it/s, agent0/loss=1966.217, env_step=17408000, len=512, n/ep=60, n/st=25600, rew=-16613.46]                            


Epoch #136: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #137: 128001it [01:44, 1227.78it/s, agent0/loss=1955.094, env_step=17536000, len=512, n/ep=40, n/st=25600, rew=-16633.50]                            


Epoch #137: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #138: 128001it [01:43, 1241.52it/s, agent0/loss=1947.165, env_step=17664000, len=512, n/ep=60, n/st=25600, rew=-16614.00]                            


Epoch #138: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #139: 128001it [01:42, 1247.01it/s, agent0/loss=1968.743, env_step=17792000, len=512, n/ep=40, n/st=25600, rew=-16633.50]                            


Epoch #139: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #140: 128001it [01:42, 1245.09it/s, agent0/loss=1920.140, env_step=17920000, len=512, n/ep=60, n/st=25600, rew=-16628.08]                            


Epoch #140: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #141: 128001it [01:42, 1242.98it/s, agent0/loss=1889.911, env_step=18048000, len=512, n/ep=40, n/st=25600, rew=-16616.44]                            


Epoch #141: test_reward: -16630.250000 ± 16630.282392, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #142: 128001it [01:42, 1243.60it/s, agent0/loss=1929.400, env_step=18176000, len=512, n/ep=60, n/st=25600, rew=-16620.50]                            


Epoch #142: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #143: 128001it [01:43, 1233.26it/s, agent0/loss=1942.428, env_step=18304000, len=512, n/ep=40, n/st=25600, rew=-16630.25]                            


Epoch #143: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #144: 128001it [01:42, 1243.50it/s, agent0/loss=1925.943, env_step=18432000, len=512, n/ep=60, n/st=25600, rew=-16622.67]                            


Epoch #144: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #145: 128001it [01:43, 1233.68it/s, agent0/loss=1943.553, env_step=18560000, len=512, n/ep=40, n/st=25600, rew=-16630.25]                            


Epoch #145: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #146: 128001it [01:43, 1232.38it/s, agent0/loss=1951.016, env_step=18688000, len=512, n/ep=60, n/st=25600, rew=-16630.25]                            


Epoch #146: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #147: 128001it [01:43, 1240.03it/s, agent0/loss=1931.545, env_step=18816000, len=512, n/ep=40, n/st=25600, rew=-16615.62]                            


Epoch #147: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #148: 128001it [01:44, 1229.00it/s, agent0/loss=4456.936, env_step=18944000, len=512, n/ep=60, n/st=25600, rew=-16231.58]                            


Epoch #148: test_reward: -16640.000000 ± 16640.000000, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #149: 128001it [01:43, 1235.91it/s, agent0/loss=10622.475, env_step=19072000, len=512, n/ep=40, n/st=25600, rew=-16612.38]                            


Epoch #149: test_reward: -16612.375000 ± 16612.510270, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #150: 128001it [01:42, 1243.20it/s, agent0/loss=14032.869, env_step=19200000, len=512, n/ep=60, n/st=25600, rew=-16609.12]                            


Epoch #150: test_reward: -16623.750000 ± 16623.854838, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #151: 128001it [01:42, 1245.71it/s, agent0/loss=8524.464, env_step=19328000, len=512, n/ep=40, n/st=25600, rew=-14267.36]                             


Epoch #151: test_reward: -4115.850000 ± 11023.740238, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #152: 128001it [01:42, 1254.65it/s, agent0/loss=26816.855, env_step=19456000, len=512, n/ep=60, n/st=25600, rew=-8418.77]                            


Epoch #152: test_reward: -9129.150000 ± 14874.174544, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #153: 128001it [01:43, 1238.39it/s, agent0/loss=67195.940, env_step=19584000, len=512, n/ep=40, n/st=25600, rew=-10239.49]                            


Epoch #153: test_reward: -8306.350000 ± 14418.678517, best_reward: -3490.125000 ± 3666.943974 in #59


Epoch #154: 128001it [01:43, 1234.24it/s, agent0/loss=83777.254, env_step=19712000, len=512, n/ep=60, n/st=25600, rew=-9628.08]                             


Best Saved
Epoch #154: test_reward: -3287.125000 ± 9998.145916, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #155: 128001it [01:43, 1234.06it/s, agent0/loss=82616.105, env_step=19840000, len=512, n/ep=40, n/st=25600, rew=-8177.70]                            


Epoch #155: test_reward: -7473.325000 ± 13905.125293, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #156: 128001it [01:44, 1229.08it/s, agent0/loss=82637.495, env_step=19968000, len=512, n/ep=60, n/st=25600, rew=-7832.62]                            


Epoch #156: test_reward: -9169.100000 ± 14849.861405, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #157: 128001it [01:42, 1249.01it/s, agent0/loss=82761.438, env_step=20096000, len=512, n/ep=40, n/st=25600, rew=-8695.71]                            


Epoch #157: test_reward: -6633.100000 ± 13323.551533, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #158: 128001it [01:43, 1242.64it/s, agent0/loss=80968.953, env_step=20224000, len=512, n/ep=60, n/st=25600, rew=-7663.11]                            


Epoch #158: test_reward: -4956.375000 ± 11898.681590, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #159: 128001it [01:43, 1239.01it/s, agent0/loss=79241.305, env_step=20352000, len=512, n/ep=40, n/st=25600, rew=-8033.43]                            


Epoch #159: test_reward: -6635.050000 ± 13322.637541, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #160: 128001it [01:43, 1231.90it/s, agent0/loss=77341.359, env_step=20480000, len=512, n/ep=60, n/st=25600, rew=-8662.62]                            


Epoch #160: test_reward: -12459.575000 ± 16127.569592, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #161: 128001it [01:43, 1233.66it/s, agent0/loss=75168.488, env_step=20608000, len=512, n/ep=40, n/st=25600, rew=-8623.85]                            


Epoch #161: test_reward: -9136.375000 ± 14869.679559, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #162: 128001it [01:43, 1232.75it/s, agent0/loss=79446.075, env_step=20736000, len=512, n/ep=60, n/st=25600, rew=-8747.28]                            


Epoch #162: test_reward: -9965.950000 ± 15262.743664, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #163: 128001it [01:42, 1243.86it/s, agent0/loss=82370.354, env_step=20864000, len=512, n/ep=40, n/st=25600, rew=-8970.65]                            


Epoch #163: test_reward: -8266.900000 ± 14441.981953, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #164: 128001it [01:42, 1248.62it/s, agent0/loss=78404.005, env_step=20992000, len=512, n/ep=60, n/st=25600, rew=-9250.70]                            


Epoch #164: test_reward: -12464.125000 ± 16124.132191, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #165: 128001it [01:43, 1241.28it/s, agent0/loss=85179.138, env_step=21120000, len=512, n/ep=40, n/st=25600, rew=-9856.05]                             


Epoch #165: test_reward: -6137.250000 ± 12508.720550, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #166: 128001it [01:43, 1237.52it/s, agent0/loss=84084.692, env_step=21248000, len=512, n/ep=60, n/st=25600, rew=-9173.47]                             


Epoch #166: test_reward: -10200.050000 ± 15115.127512, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #167: 128001it [01:42, 1246.91it/s, agent0/loss=71480.847, env_step=21376000, len=512, n/ep=40, n/st=25600, rew=-9454.09]                            


Epoch #167: test_reward: -10196.600000 ± 15117.340111, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #168: 128001it [01:41, 1255.14it/s, agent0/loss=67715.918, env_step=21504000, len=512, n/ep=60, n/st=25600, rew=-9411.77]                            


Epoch #168: test_reward: -10983.350000 ± 15475.487963, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #169: 128001it [01:43, 1240.26it/s, agent0/loss=67074.409, env_step=21632000, len=512, n/ep=40, n/st=25600, rew=-9020.20]                             


Epoch #169: test_reward: -10170.725000 ± 15132.922662, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #170: 128001it [01:42, 1252.06it/s, agent0/loss=67126.282, env_step=21760000, len=512, n/ep=60, n/st=25600, rew=-9793.03]                             


Epoch #170: test_reward: -8542.400000 ± 14287.140891, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #171: 128001it [01:42, 1254.47it/s, agent0/loss=67150.418, env_step=21888000, len=512, n/ep=40, n/st=25600, rew=-8955.08]                             


Epoch #171: test_reward: -8539.025000 ± 14288.872295, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #172: 128001it [01:43, 1238.00it/s, agent0/loss=67078.882, env_step=22016000, len=512, n/ep=60, n/st=25600, rew=-10653.38]                            


Epoch #172: test_reward: -6922.300000 ± 13184.937552, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #173: 128001it [01:43, 1238.61it/s, agent0/loss=66972.575, env_step=22144000, len=512, n/ep=40, n/st=25600, rew=-9138.66]                             


Epoch #173: test_reward: -9325.750000 ± 14756.231626, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #174: 128001it [01:42, 1251.55it/s, agent0/loss=66867.249, env_step=22272000, len=512, n/ep=60, n/st=25600, rew=-8852.75]                             


Epoch #174: test_reward: -9350.075000 ± 14742.270253, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #175: 128001it [01:43, 1238.91it/s, agent0/loss=67089.918, env_step=22400000, len=512, n/ep=40, n/st=25600, rew=-8867.36]                             


Epoch #175: test_reward: -10175.550000 ± 15129.799376, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #176: 128001it [01:44, 1226.57it/s, agent0/loss=67413.903, env_step=22528000, len=512, n/ep=60, n/st=25600, rew=-9380.78]                            


Epoch #176: test_reward: -8522.750000 ± 14297.641326, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #177: 128001it [01:42, 1249.75it/s, agent0/loss=67403.084, env_step=22656000, len=512, n/ep=40, n/st=25600, rew=-8362.80]                            


Epoch #177: test_reward: -8490.550000 ± 14315.158169, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #178: 128001it [01:43, 1235.39it/s, agent0/loss=67031.405, env_step=22784000, len=512, n/ep=60, n/st=25600, rew=-8456.03]                            


Epoch #178: test_reward: -7499.900000 ± 13890.746940, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #179: 128001it [01:42, 1243.59it/s, agent0/loss=67160.419, env_step=22912000, len=512, n/ep=40, n/st=25600, rew=-8545.56]                            


Epoch #179: test_reward: -7498.900000 ± 13891.285685, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #180: 128001it [01:42, 1245.66it/s, agent0/loss=68048.979, env_step=23040000, len=512, n/ep=60, n/st=25600, rew=-8622.88]                             


Epoch #180: test_reward: -8324.575000 ± 14408.030070, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #181: 128001it [01:43, 1242.41it/s, agent0/loss=68871.054, env_step=23168000, len=512, n/ep=40, n/st=25600, rew=-8688.34]                            


Epoch #181: test_reward: -9156.725000 ± 14857.094100, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #182: 128001it [01:43, 1241.45it/s, agent0/loss=69153.317, env_step=23296000, len=512, n/ep=60, n/st=25600, rew=-8626.20]                             


Epoch #182: test_reward: -11654.125000 ± 15869.063823, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #183: 128001it [01:43, 1240.30it/s, agent0/loss=69066.513, env_step=23424000, len=512, n/ep=40, n/st=25600, rew=-9381.41]                            


Epoch #183: test_reward: -9157.750000 ± 14856.463553, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #184: 128001it [01:42, 1253.27it/s, agent0/loss=69055.966, env_step=23552000, len=512, n/ep=60, n/st=25600, rew=-7703.81]                             


Epoch #184: test_reward: -8332.300000 ± 14403.580583, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #185: 128001it [01:41, 1256.42it/s, agent0/loss=69027.878, env_step=23680000, len=512, n/ep=40, n/st=25600, rew=-8706.33]                             


Epoch #185: test_reward: -9990.250000 ± 15246.731315, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #186: 128001it [01:43, 1239.48it/s, agent0/loss=68976.926, env_step=23808000, len=512, n/ep=60, n/st=25600, rew=-9712.85]                            


Epoch #186: test_reward: -9988.725000 ± 15247.727390, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #187: 128001it [01:43, 1239.18it/s, agent0/loss=68925.642, env_step=23936000, len=512, n/ep=40, n/st=25600, rew=-9364.66]                            


Epoch #187: test_reward: -8326.300000 ± 14407.037355, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #188: 128001it [01:42, 1253.87it/s, agent0/loss=68855.567, env_step=24064000, len=512, n/ep=60, n/st=25600, rew=-7801.93]                            


Epoch #188: test_reward: -6666.875000 ± 13306.582371, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #189: 128001it [01:42, 1244.94it/s, agent0/loss=68818.603, env_step=24192000, len=512, n/ep=40, n/st=25600, rew=-10022.52]                            


Epoch #189: test_reward: -9158.225000 ± 14856.172536, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #190: 128001it [01:43, 1236.67it/s, agent0/loss=68761.971, env_step=24320000, len=512, n/ep=60, n/st=25600, rew=-10235.85]                            


Epoch #190: test_reward: -9987.175000 ± 15248.739526, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #191: 128001it [01:42, 1246.05it/s, agent0/loss=68628.800, env_step=24448000, len=512, n/ep=40, n/st=25600, rew=-8917.61]                            


Epoch #191: test_reward: -6665.100000 ± 13307.467007, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #192: 128001it [01:42, 1244.53it/s, agent0/loss=68204.200, env_step=24576000, len=512, n/ep=60, n/st=25600, rew=-8404.48]                             


Epoch #192: test_reward: -9990.200000 ± 15246.763875, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #193: 128001it [01:42, 1244.41it/s, agent0/loss=67223.837, env_step=24704000, len=512, n/ep=40, n/st=25600, rew=-9617.46]                            


Epoch #193: test_reward: -7493.750000 ± 13894.052160, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #194: 128001it [01:43, 1239.65it/s, agent0/loss=65305.497, env_step=24832000, len=512, n/ep=60, n/st=25600, rew=-9268.99]                            


Epoch #194: test_reward: -8326.200000 ± 14407.094714, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #195: 128001it [01:43, 1237.59it/s, agent0/loss=67403.580, env_step=24960000, len=512, n/ep=40, n/st=25600, rew=-7983.50]                            


Epoch #195: test_reward: -9988.250000 ± 15248.036854, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #196: 128001it [01:44, 1219.43it/s, agent0/loss=68688.926, env_step=25088000, len=512, n/ep=60, n/st=25600, rew=-9825.27]                             


Epoch #196: test_reward: -8327.675000 ± 14406.245263, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #197: 128001it [01:43, 1233.47it/s, agent0/loss=64253.830, env_step=25216000, len=512, n/ep=40, n/st=25600, rew=-8903.89]                            


Epoch #197: test_reward: -6663.650000 ± 13308.189878, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #198: 128001it [01:43, 1235.16it/s, agent0/loss=74430.002, env_step=25344000, len=512, n/ep=60, n/st=25600, rew=-9085.37]                            


Epoch #198: test_reward: -6662.300000 ± 13308.863053, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #199: 128001it [01:43, 1233.61it/s, agent0/loss=78769.312, env_step=25472000, len=512, n/ep=40, n/st=25600, rew=-10400.70]                            


Epoch #199: test_reward: -8329.325000 ± 14405.295041, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #200: 128001it [01:43, 1235.08it/s, agent0/loss=70260.420, env_step=25600000, len=512, n/ep=60, n/st=25600, rew=-9540.02]                            


Epoch #200: test_reward: -9154.900000 ± 14858.214383, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #201: 128001it [01:43, 1236.43it/s, agent0/loss=69947.233, env_step=25728000, len=512, n/ep=40, n/st=25600, rew=-8169.80]                            


Epoch #201: test_reward: -8329.050000 ± 14405.452879, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #202: 128001it [01:43, 1234.08it/s, agent0/loss=69999.053, env_step=25856000, len=512, n/ep=60, n/st=25600, rew=-9322.12]                            


Epoch #202: test_reward: -8329.075000 ± 14405.438521, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #203: 128001it [01:43, 1239.69it/s, agent0/loss=70112.554, env_step=25984000, len=512, n/ep=40, n/st=25600, rew=-6616.00]                            


Epoch #203: test_reward: -6670.125000 ± 13304.961470, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #204: 128001it [01:42, 1247.62it/s, agent0/loss=70123.202, env_step=26112000, len=512, n/ep=60, n/st=25600, rew=-8189.17]                            


Epoch #204: test_reward: -5830.150000 ± 12642.485920, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #205: 128001it [01:44, 1230.46it/s, agent0/loss=70165.242, env_step=26240000, len=512, n/ep=40, n/st=25600, rew=-9812.08]                             


Epoch #205: test_reward: -9159.600000 ± 14855.327393, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #206: 128001it [01:43, 1241.36it/s, agent0/loss=70263.465, env_step=26368000, len=512, n/ep=60, n/st=25600, rew=-8380.43]                             


Epoch #206: test_reward: -5836.125000 ± 12639.742961, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #207: 128001it [01:44, 1230.16it/s, agent0/loss=70288.851, env_step=26496000, len=512, n/ep=40, n/st=25600, rew=-7905.24]                             


Epoch #207: test_reward: -5836.150000 ± 12639.731509, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #208: 128001it [01:44, 1227.23it/s, agent0/loss=70290.661, env_step=26624000, len=512, n/ep=60, n/st=25600, rew=-8777.17]                            


Epoch #208: test_reward: -5830.150000 ± 12642.485880, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #209: 128001it [01:43, 1241.47it/s, agent0/loss=70335.586, env_step=26752000, len=512, n/ep=40, n/st=25600, rew=-9447.41]                            


Epoch #209: test_reward: -9162.300000 ± 14853.667187, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #210: 128001it [01:43, 1234.19it/s, agent0/loss=70444.214, env_step=26880000, len=512, n/ep=60, n/st=25600, rew=-10168.33]                            


Epoch #210: test_reward: -9159.725000 ± 14855.250777, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #211: 128001it [01:43, 1235.08it/s, agent0/loss=70276.113, env_step=27008000, len=512, n/ep=40, n/st=25600, rew=-8643.64]                             


Epoch #211: test_reward: -7500.200000 ± 13890.586255, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #212: 128001it [01:43, 1238.94it/s, agent0/loss=69977.089, env_step=27136000, len=512, n/ep=60, n/st=25600, rew=-9855.76]                             


Epoch #212: test_reward: -11652.775000 ± 15870.052893, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #213: 128001it [01:43, 1231.33it/s, agent0/loss=69923.928, env_step=27264000, len=512, n/ep=40, n/st=25600, rew=-9402.08]                             


Epoch #213: test_reward: -9158.075000 ± 14856.264381, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #214: 128001it [01:44, 1227.16it/s, agent0/loss=70114.536, env_step=27392000, len=512, n/ep=60, n/st=25600, rew=-9255.88]                            


Epoch #214: test_reward: -8325.825000 ± 14407.310094, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #215: 128001it [01:43, 1240.26it/s, agent0/loss=70337.931, env_step=27520000, len=512, n/ep=40, n/st=25600, rew=-10410.48]                            


Epoch #215: test_reward: -5834.600000 ± 12640.443362, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #216: 128001it [01:43, 1240.62it/s, agent0/loss=70368.846, env_step=27648000, len=512, n/ep=60, n/st=25600, rew=-9770.52]                            


Epoch #216: test_reward: -7501.950000 ± 13889.645501, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #217: 128001it [01:43, 1236.43it/s, agent0/loss=70385.594, env_step=27776000, len=512, n/ep=40, n/st=25600, rew=-11704.60]                            


Epoch #217: test_reward: -6665.250000 ± 13307.392432, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #218: 128001it [01:44, 1229.99it/s, agent0/loss=70376.727, env_step=27904000, len=512, n/ep=60, n/st=25600, rew=-9545.48]                             


Epoch #218: test_reward: -12484.575000 ± 16108.074584, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #219: 128001it [01:44, 1229.91it/s, agent0/loss=70196.673, env_step=28032000, len=512, n/ep=40, n/st=25600, rew=-9349.06]                            


Epoch #219: test_reward: -9157.550000 ± 14856.586093, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #220: 128001it [01:42, 1246.52it/s, agent0/loss=70273.084, env_step=28160000, len=512, n/ep=60, n/st=25600, rew=-8975.13]                             


Epoch #220: test_reward: -8327.675000 ± 14406.245221, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #221: 128001it [01:43, 1231.83it/s, agent0/loss=70528.870, env_step=28288000, len=512, n/ep=40, n/st=25600, rew=-9278.41]                            


Epoch #221: test_reward: -9156.475000 ± 14857.247151, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #222: 128001it [01:43, 1233.76it/s, agent0/loss=70554.174, env_step=28416000, len=512, n/ep=60, n/st=25600, rew=-8195.76]                            


Epoch #222: test_reward: -9161.175000 ± 14854.359491, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #223: 128001it [01:43, 1234.24it/s, agent0/loss=70530.230, env_step=28544000, len=512, n/ep=40, n/st=25600, rew=-8373.26]                            


Epoch #223: test_reward: -5831.650000 ± 12641.797605, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #224: 128001it [01:43, 1234.54it/s, agent0/loss=70529.928, env_step=28672000, len=512, n/ep=60, n/st=25600, rew=-8974.23]                            


Epoch #224: test_reward: -7495.700000 ± 13893.005287, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #225: 128001it [01:43, 1237.23it/s, agent0/loss=70543.021, env_step=28800000, len=512, n/ep=40, n/st=25600, rew=-9369.24]                             


Epoch #225: test_reward: -8324.725000 ± 14407.944038, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #226: 128001it [01:42, 1246.76it/s, agent0/loss=70339.995, env_step=28928000, len=512, n/ep=60, n/st=25600, rew=-8760.98]                             


Epoch #226: test_reward: -6665.325000 ± 13307.355215, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #227: 128001it [01:43, 1233.19it/s, agent0/loss=70092.735, env_step=29056000, len=512, n/ep=40, n/st=25600, rew=-11056.34]                            


Epoch #227: test_reward: -9156.700000 ± 14857.109401, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #228: 128001it [01:43, 1232.46it/s, agent0/loss=70262.835, env_step=29184000, len=512, n/ep=60, n/st=25600, rew=-10098.52]                            


Epoch #228: test_reward: -10822.050000 ± 15583.326997, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #229: 128001it [01:42, 1245.41it/s, agent0/loss=70478.245, env_step=29312000, len=512, n/ep=40, n/st=25600, rew=-9692.80]                            


Epoch #229: test_reward: -14147.025000 ± 16449.139393, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #230: 128001it [01:43, 1234.02it/s, agent0/loss=70488.383, env_step=29440000, len=512, n/ep=60, n/st=25600, rew=-8994.94]                            


Epoch #230: test_reward: -6664.950000 ± 13307.541413, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #231: 128001it [01:42, 1244.37it/s, agent0/loss=70627.748, env_step=29568000, len=512, n/ep=40, n/st=25600, rew=-9969.65]                             


Epoch #231: test_reward: -13314.950000 ± 16301.399963, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #232: 128001it [01:43, 1236.00it/s, agent0/loss=70723.975, env_step=29696000, len=512, n/ep=60, n/st=25600, rew=-9063.80]                            


Epoch #232: test_reward: -9157.775000 ± 14856.448298, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #233: 128001it [01:43, 1238.94it/s, agent0/loss=70669.391, env_step=29824000, len=512, n/ep=40, n/st=25600, rew=-9059.09]                             


Epoch #233: test_reward: -11654.150000 ± 15869.045536, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #234: 128001it [01:42, 1246.91it/s, agent0/loss=70698.021, env_step=29952000, len=512, n/ep=60, n/st=25600, rew=-8310.80]                             


Epoch #234: test_reward: -7494.275000 ± 13893.771124, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #235: 128001it [01:44, 1227.70it/s, agent0/loss=70733.930, env_step=30080000, len=512, n/ep=40, n/st=25600, rew=-10130.08]                            


Epoch #235: test_reward: -8327.350000 ± 14406.431934, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #236: 128001it [01:43, 1234.89it/s, agent0/loss=70744.155, env_step=30208000, len=512, n/ep=60, n/st=25600, rew=-10491.11]                            


Epoch #236: test_reward: -9991.775000 ± 15245.735032, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #237: 128001it [01:42, 1243.54it/s, agent0/loss=70854.342, env_step=30336000, len=512, n/ep=40, n/st=25600, rew=-9448.67]                             


Epoch #237: test_reward: -5833.400000 ± 12640.994976, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #238: 128001it [01:43, 1242.14it/s, agent0/loss=70863.266, env_step=30464000, len=512, n/ep=60, n/st=25600, rew=-9107.12]                            


Epoch #238: test_reward: -7497.500000 ± 13892.038565, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #239: 128001it [01:43, 1233.28it/s, agent0/loss=70784.372, env_step=30592000, len=512, n/ep=40, n/st=25600, rew=-8570.55]                            


Epoch #239: test_reward: -9158.200000 ± 14856.187842, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #240: 128001it [01:43, 1237.12it/s, agent0/loss=70699.329, env_step=30720000, len=512, n/ep=60, n/st=25600, rew=-10751.74]                            


Epoch #240: test_reward: -6665.050000 ± 13307.491796, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #241: 128001it [01:43, 1236.08it/s, agent0/loss=70769.807, env_step=30848000, len=512, n/ep=40, n/st=25600, rew=-6293.99]                             


Epoch #241: test_reward: -10823.850000 ± 15582.080870, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #242: 128001it [01:43, 1235.91it/s, agent0/loss=70999.405, env_step=30976000, len=512, n/ep=60, n/st=25600, rew=-8770.69]                            


Epoch #242: test_reward: -8327.425000 ± 14406.388770, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #243: 128001it [01:43, 1234.63it/s, agent0/loss=71003.020, env_step=31104000, len=512, n/ep=40, n/st=25600, rew=-9543.77]                             


Epoch #243: test_reward: -10822.025000 ± 15583.344281, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #244: 128001it [01:42, 1243.03it/s, agent0/loss=70950.499, env_step=31232000, len=512, n/ep=60, n/st=25600, rew=-9366.17]                            


Epoch #244: test_reward: -7497.075000 ± 13892.266178, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #245: 128001it [01:43, 1236.86it/s, agent0/loss=70972.135, env_step=31360000, len=512, n/ep=40, n/st=25600, rew=-10076.85]                            


Epoch #245: test_reward: -10822.225000 ± 15583.206150, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #246: 128001it [01:43, 1233.81it/s, agent0/loss=71065.130, env_step=31488000, len=512, n/ep=60, n/st=25600, rew=-8978.39]                            


Epoch #246: test_reward: -5836.125000 ± 12639.742949, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #247: 128001it [01:42, 1242.98it/s, agent0/loss=71075.629, env_step=31616000, len=512, n/ep=40, n/st=25600, rew=-10142.98]                            


Epoch #247: test_reward: -9161.225000 ± 14854.328867, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #248: 128001it [01:43, 1233.88it/s, agent0/loss=70939.018, env_step=31744000, len=512, n/ep=60, n/st=25600, rew=-9356.88]                             


Epoch #248: test_reward: -10823.675000 ± 15582.201763, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #249: 128001it [01:43, 1240.30it/s, agent0/loss=70634.446, env_step=31872000, len=512, n/ep=40, n/st=25600, rew=-10445.65]                            


Epoch #249: test_reward: -7494.150000 ± 13893.837998, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #250: 128001it [01:43, 1235.13it/s, agent0/loss=70648.696, env_step=32000000, len=512, n/ep=60, n/st=25600, rew=-9812.89]                            


Epoch #250: test_reward: -6662.025000 ± 13308.999499, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #251: 128001it [01:42, 1246.16it/s, agent0/loss=71000.148, env_step=32128000, len=512, n/ep=40, n/st=25600, rew=-10516.44]                            


Epoch #251: test_reward: -5831.725000 ± 12641.763376, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #252: 128001it [01:43, 1236.75it/s, agent0/loss=71167.184, env_step=32256000, len=512, n/ep=60, n/st=25600, rew=-9508.42]                            


Epoch #252: test_reward: -8323.150000 ± 14408.850495, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #253: 128001it [01:43, 1234.45it/s, agent0/loss=71260.255, env_step=32384000, len=512, n/ep=40, n/st=25600, rew=-8328.60]                            


Epoch #253: test_reward: -9156.375000 ± 14857.308460, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #254: 128001it [01:43, 1233.36it/s, agent0/loss=71246.945, env_step=32512000, len=512, n/ep=60, n/st=25600, rew=-8678.24]                             


Epoch #254: test_reward: -7491.100000 ± 13895.475992, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #255: 128001it [01:43, 1240.33it/s, agent0/loss=71194.147, env_step=32640000, len=512, n/ep=40, n/st=25600, rew=-10987.98]                            


Epoch #255: test_reward: -11654.225000 ± 15868.990747, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #256: 128001it [01:44, 1227.30it/s, agent0/loss=71097.353, env_step=32768000, len=512, n/ep=60, n/st=25600, rew=-10219.76]                            


Epoch #256: test_reward: -9987.125000 ± 15248.772067, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #257: 128001it [01:43, 1237.90it/s, agent0/loss=71050.329, env_step=32896000, len=512, n/ep=40, n/st=25600, rew=-8045.39]                            


Epoch #257: test_reward: -5834.375000 ± 12640.546218, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #258: 128001it [01:43, 1232.99it/s, agent0/loss=71136.196, env_step=33024000, len=512, n/ep=60, n/st=25600, rew=-9106.56]                            


Epoch #258: test_reward: -8327.600000 ± 14406.288264, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #259: 128001it [01:43, 1234.70it/s, agent0/loss=71164.945, env_step=33152000, len=512, n/ep=40, n/st=25600, rew=-8767.35]                             


Epoch #259: test_reward: -3340.500000 ± 9979.863792, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #260: 128001it [01:44, 1225.11it/s, agent0/loss=71246.353, env_step=33280000, len=512, n/ep=60, n/st=25600, rew=-8068.20]                            


Epoch #260: test_reward: -6665.050000 ± 13307.491704, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #261: 128001it [01:43, 1236.06it/s, agent0/loss=71372.798, env_step=33408000, len=512, n/ep=40, n/st=25600, rew=-10337.59]                            


Epoch #261: test_reward: -10823.850000 ± 15582.080871, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #262: 128001it [01:43, 1231.19it/s, agent0/loss=71349.692, env_step=33536000, len=512, n/ep=60, n/st=25600, rew=-7568.60]                             


Epoch #262: test_reward: -8326.275000 ± 14407.051699, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #263: 128001it [01:43, 1240.61it/s, agent0/loss=71306.261, env_step=33664000, len=512, n/ep=40, n/st=25600, rew=-9500.14]                            


Epoch #263: test_reward: -8326.050000 ± 14407.180774, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #264: 128001it [01:42, 1251.27it/s, agent0/loss=71319.034, env_step=33792000, len=512, n/ep=60, n/st=25600, rew=-8262.36]                             


Epoch #264: test_reward: -9162.625000 ± 14853.467945, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #265: 128001it [01:43, 1234.09it/s, agent0/loss=71342.968, env_step=33920000, len=512, n/ep=40, n/st=25600, rew=-8278.10]                             


Epoch #265: test_reward: -7494.225000 ± 13893.797880, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #266: 128001it [01:43, 1234.80it/s, agent0/loss=71394.982, env_step=34048000, len=512, n/ep=60, n/st=25600, rew=-9557.41]                            


Epoch #266: test_reward: -5834.625000 ± 12640.432035, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #267: 128001it [01:43, 1234.24it/s, agent0/loss=71403.750, env_step=34176000, len=512, n/ep=40, n/st=25600, rew=-9076.04]                            


Epoch #267: test_reward: -11651.075000 ± 15871.297378, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #268: 128001it [01:44, 1229.80it/s, agent0/loss=71459.120, env_step=34304000, len=512, n/ep=60, n/st=25600, rew=-10163.70]                            


Epoch #268: test_reward: -8329.150000 ± 14405.395504, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #269: 128001it [01:43, 1235.11it/s, agent0/loss=71508.433, env_step=34432000, len=512, n/ep=40, n/st=25600, rew=-6564.51]                            


Epoch #269: test_reward: -9990.275000 ± 15246.715033, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #270: 128001it [01:43, 1237.45it/s, agent0/loss=71524.938, env_step=34560000, len=512, n/ep=60, n/st=25600, rew=-7930.68]                             


Epoch #270: test_reward: -8332.125000 ± 14403.681096, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #271: 128001it [01:44, 1227.91it/s, agent0/loss=71495.141, env_step=34688000, len=512, n/ep=40, n/st=25600, rew=-8675.99]                            


Epoch #271: test_reward: -10822.350000 ± 15583.119833, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #272: 128001it [01:44, 1226.84it/s, agent0/loss=71530.237, env_step=34816000, len=512, n/ep=60, n/st=25600, rew=-7619.69]                             


Epoch #272: test_reward: -6662.100000 ± 13308.962313, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #273: 128001it [01:43, 1235.39it/s, agent0/loss=71557.889, env_step=34944000, len=512, n/ep=40, n/st=25600, rew=-9686.30]                            


Epoch #273: test_reward: -7496.950000 ± 13892.333073, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #274: 128001it [01:43, 1233.89it/s, agent0/loss=71436.715, env_step=35072000, len=512, n/ep=60, n/st=25600, rew=-7566.02]                             


Epoch #274: test_reward: -9162.425000 ± 14853.590591, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #275: 128001it [01:43, 1236.04it/s, agent0/loss=71426.766, env_step=35200000, len=512, n/ep=40, n/st=25600, rew=-7398.12]                            


Epoch #275: test_reward: -11649.525000 ± 15872.432085, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #276: 128001it [01:43, 1237.47it/s, agent0/loss=71521.228, env_step=35328000, len=512, n/ep=60, n/st=25600, rew=-8331.69]                             


Epoch #276: test_reward: -9156.550000 ± 14857.201225, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #277: 128001it [01:42, 1244.01it/s, agent0/loss=71585.144, env_step=35456000, len=512, n/ep=40, n/st=25600, rew=-9236.79]                            


Epoch #277: test_reward: -9988.575000 ± 15247.825050, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #278: 128001it [01:44, 1230.28it/s, agent0/loss=71559.264, env_step=35584000, len=512, n/ep=60, n/st=25600, rew=-9986.35]                             


Epoch #278: test_reward: -9159.775000 ± 14855.220157, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #279: 128001it [01:43, 1234.99it/s, agent0/loss=71518.548, env_step=35712000, len=512, n/ep=40, n/st=25600, rew=-9941.24]                            


Epoch #279: test_reward: -9156.500000 ± 14857.231848, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #280: 128001it [01:43, 1232.64it/s, agent0/loss=71558.961, env_step=35840000, len=512, n/ep=60, n/st=25600, rew=-9473.33]                             


Epoch #280: test_reward: -7495.775000 ± 13892.965185, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #281: 128001it [01:43, 1239.00it/s, agent0/loss=71616.591, env_step=35968000, len=512, n/ep=40, n/st=25600, rew=-8490.90]                             


Epoch #281: test_reward: -7494.225000 ± 13893.797880, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #282: 128001it [01:42, 1248.82it/s, agent0/loss=71651.505, env_step=36096000, len=512, n/ep=60, n/st=25600, rew=-9360.69]                            


Epoch #282: test_reward: -8327.150000 ± 14406.546898, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #283: 128001it [01:44, 1225.62it/s, agent0/loss=71674.398, env_step=36224000, len=512, n/ep=40, n/st=25600, rew=-9258.89]                             


Epoch #283: test_reward: -5004.025000 ± 11878.308847, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #284: 128001it [01:43, 1236.63it/s, agent0/loss=71609.667, env_step=36352000, len=512, n/ep=60, n/st=25600, rew=-10198.70]                            


Epoch #284: test_reward: -7495.575000 ± 13893.072250, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #285: 128001it [01:43, 1231.08it/s, agent0/loss=71574.141, env_step=36480000, len=512, n/ep=40, n/st=25600, rew=-8267.17]                            


Epoch #285: test_reward: -8326.125000 ± 14407.137740, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #286: 128001it [01:43, 1234.81it/s, agent0/loss=71634.086, env_step=36608000, len=512, n/ep=60, n/st=25600, rew=-9633.88]                            


Epoch #286: test_reward: -6663.725000 ± 13308.152646, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #287: 128001it [01:42, 1245.53it/s, agent0/loss=71797.088, env_step=36736000, len=512, n/ep=40, n/st=25600, rew=-9135.39]                             


Epoch #287: test_reward: -5835.975000 ± 12639.811615, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #288: 128001it [01:44, 1229.91it/s, agent0/loss=71981.744, env_step=36864000, len=512, n/ep=60, n/st=25600, rew=-8886.36]                             


Epoch #288: test_reward: -9990.225000 ± 15246.747593, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #289: 128001it [01:43, 1234.92it/s, agent0/loss=71980.388, env_step=36992000, len=512, n/ep=40, n/st=25600, rew=-8797.95]                            


Epoch #289: test_reward: -6663.575000 ± 13308.227076, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #290: 128001it [01:43, 1235.45it/s, agent0/loss=71883.657, env_step=37120000, len=512, n/ep=60, n/st=25600, rew=-9721.90]                            


Epoch #290: test_reward: -8333.250000 ± 14403.032130, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #291: 128001it [01:43, 1241.28it/s, agent0/loss=71889.288, env_step=37248000, len=512, n/ep=40, n/st=25600, rew=-8957.35]                            


Epoch #291: test_reward: -8327.675000 ± 14406.245197, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #292: 128001it [01:43, 1236.27it/s, agent0/loss=71774.615, env_step=37376000, len=512, n/ep=60, n/st=25600, rew=-8397.14]                            


Epoch #292: test_reward: -7496.600000 ± 13892.520536, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #293: 128001it [01:43, 1233.23it/s, agent0/loss=71732.735, env_step=37504000, len=512, n/ep=40, n/st=25600, rew=-8462.46]                            


Epoch #293: test_reward: -9988.325000 ± 15247.987879, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #294: 128001it [01:43, 1233.49it/s, agent0/loss=71950.264, env_step=37632000, len=512, n/ep=60, n/st=25600, rew=-10141.22]                            


Epoch #294: test_reward: -7495.825000 ± 13892.938416, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #295: 128001it [01:43, 1238.25it/s, agent0/loss=72060.464, env_step=37760000, len=512, n/ep=40, n/st=25600, rew=-8155.01]                            


Epoch #295: test_reward: -9987.150000 ± 15248.755798, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #296: 128001it [01:44, 1227.97it/s, agent0/loss=71935.299, env_step=37888000, len=512, n/ep=60, n/st=25600, rew=-9947.10]                            


Epoch #296: test_reward: -7497.425000 ± 13892.078703, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #297: 128001it [01:43, 1235.43it/s, agent0/loss=71799.263, env_step=38016000, len=512, n/ep=40, n/st=25600, rew=-8135.64]                             


Epoch #297: test_reward: -7496.925000 ± 13892.346421, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #298: 128001it [01:42, 1252.52it/s, agent0/loss=71931.776, env_step=38144000, len=512, n/ep=60, n/st=25600, rew=-8398.68]                            


Epoch #298: test_reward: -9993.325000 ± 15244.722220, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #299: 128001it [01:43, 1240.41it/s, agent0/loss=72118.441, env_step=38272000, len=512, n/ep=40, n/st=25600, rew=-9440.75]                            


Epoch #299: test_reward: -6666.625000 ± 13306.706468, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #300: 128001it [01:43, 1236.58it/s, agent0/loss=72196.307, env_step=38400000, len=512, n/ep=60, n/st=25600, rew=-8621.84]                            


Epoch #300: test_reward: -5001.375000 ± 11879.418941, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #301: 128001it [01:43, 1236.81it/s, agent0/loss=72081.498, env_step=38528000, len=512, n/ep=40, n/st=25600, rew=-8394.26]                            


Epoch #301: test_reward: -5004.250000 ± 11878.215124, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #302: 128001it [01:43, 1231.47it/s, agent0/loss=71807.018, env_step=38656000, len=512, n/ep=60, n/st=25600, rew=-8298.04]                            


Epoch #302: test_reward: -10822.225000 ± 15583.206149, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #303: 128001it [01:43, 1240.86it/s, agent0/loss=71959.629, env_step=38784000, len=512, n/ep=40, n/st=25600, rew=-9515.25]                            


Epoch #303: test_reward: -6671.200000 ± 13304.423958, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #304: 128001it [01:42, 1243.52it/s, agent0/loss=72237.344, env_step=38912000, len=512, n/ep=60, n/st=25600, rew=-8313.71]                            


Epoch #304: test_reward: -9156.775000 ± 14857.063493, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #305: 128001it [01:43, 1234.36it/s, agent0/loss=72184.031, env_step=39040000, len=512, n/ep=40, n/st=25600, rew=-8621.81]                            


Epoch #305: test_reward: -9990.025000 ± 15246.877858, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #306: 128001it [01:43, 1234.17it/s, agent0/loss=72192.697, env_step=39168000, len=512, n/ep=60, n/st=25600, rew=-9126.83]                             


Epoch #306: test_reward: -6668.475000 ± 13305.784470, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #307: 128001it [01:43, 1238.29it/s, agent0/loss=72284.194, env_step=39296000, len=512, n/ep=40, n/st=25600, rew=-9564.49]                            


Epoch #307: test_reward: -9156.725000 ± 14857.094097, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #308: 128001it [01:44, 1230.52it/s, agent0/loss=72355.931, env_step=39424000, len=512, n/ep=60, n/st=25600, rew=-8473.41]                            


Epoch #308: test_reward: -12485.850000 ± 16107.088270, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #309: 128001it [01:44, 1230.53it/s, agent0/loss=72279.477, env_step=39552000, len=512, n/ep=40, n/st=25600, rew=-8837.86]                            


Epoch #309: test_reward: -9159.725000 ± 14855.250782, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #310: 128001it [01:43, 1237.32it/s, agent0/loss=72191.533, env_step=39680000, len=512, n/ep=60, n/st=25600, rew=-10037.87]                            


Epoch #310: test_reward: -9159.725000 ± 14855.250789, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #311: 128001it [01:43, 1231.91it/s, agent0/loss=72292.315, env_step=39808000, len=512, n/ep=40, n/st=25600, rew=-10233.09]                            


Epoch #311: test_reward: -9156.575000 ± 14857.185923, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #312: 128001it [01:43, 1237.16it/s, agent0/loss=72318.995, env_step=39936000, len=512, n/ep=60, n/st=25600, rew=-9673.76]                             


Epoch #312: test_reward: -9157.950000 ± 14856.340958, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #313: 128001it [01:43, 1238.34it/s, agent0/loss=72212.628, env_step=40064000, len=512, n/ep=40, n/st=25600, rew=-8493.27]                            


Epoch #313: test_reward: -11648.000000 ± 15873.548312, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #314: 128001it [01:43, 1230.80it/s, agent0/loss=72208.407, env_step=40192000, len=512, n/ep=60, n/st=25600, rew=-11366.00]                            


Epoch #314: test_reward: -8324.725000 ± 14407.944041, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #315: 128001it [01:42, 1250.37it/s, agent0/loss=72375.477, env_step=40320000, len=512, n/ep=40, n/st=25600, rew=-8960.71]                             


Epoch #315: test_reward: -9162.600000 ± 14853.483246, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #316: 128001it [01:44, 1227.44it/s, agent0/loss=72484.702, env_step=40448000, len=512, n/ep=60, n/st=25600, rew=-7842.32]                            


Epoch #316: test_reward: -10819.200000 ± 15585.300644, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #317: 128001it [01:43, 1237.15it/s, agent0/loss=72476.501, env_step=40576000, len=512, n/ep=40, n/st=25600, rew=-9211.56]                             


Epoch #317: test_reward: -6668.400000 ± 13305.821686, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #318: 128001it [01:43, 1241.50it/s, agent0/loss=72498.544, env_step=40704000, len=512, n/ep=60, n/st=25600, rew=-8429.99]                            


Epoch #318: test_reward: -7497.175000 ± 13892.212536, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #319: 128001it [01:44, 1227.47it/s, agent0/loss=72512.050, env_step=40832000, len=512, n/ep=40, n/st=25600, rew=-10976.56]                            


Epoch #319: test_reward: -5831.875000 ± 12641.694908, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #320: 128001it [01:43, 1236.00it/s, agent0/loss=72619.210, env_step=40960000, len=512, n/ep=60, n/st=25600, rew=-9326.50]                             


Epoch #320: test_reward: -6666.275000 ± 13306.880491, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #321: 128001it [01:43, 1236.85it/s, agent0/loss=72784.186, env_step=41088000, len=512, n/ep=40, n/st=25600, rew=-9268.52]                            


Epoch #321: test_reward: -8329.475000 ± 14405.208950, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #322: 128001it [01:42, 1242.92it/s, agent0/loss=72730.694, env_step=41216000, len=512, n/ep=60, n/st=25600, rew=-9568.20]                             


Epoch #322: test_reward: -7495.825000 ± 13892.938406, best_reward: -3287.125000 ± 9998.145916 in #154


Epoch #323: 128001it [01:44, 1223.47it/s, agent0/loss=72550.593, env_step=41344000, len=512, n/ep=40, n/st=25600, rew=-8619.39]                            


Best Saved
Epoch #323: test_reward: -2509.850000 ± 8761.757029, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #324: 128001it [01:42, 1244.19it/s, agent0/loss=72424.205, env_step=41472000, len=512, n/ep=60, n/st=25600, rew=-8460.12]                             


Epoch #324: test_reward: -4170.450000 ± 11002.398597, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #325: 128001it [01:43, 1234.47it/s, agent0/loss=72417.279, env_step=41600000, len=512, n/ep=40, n/st=25600, rew=-9686.39]                            


Epoch #325: test_reward: -10821.900000 ± 15583.430731, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #326: 128001it [01:43, 1234.71it/s, agent0/loss=72533.455, env_step=41728000, len=512, n/ep=60, n/st=25600, rew=-9268.88]                            


Epoch #326: test_reward: -6666.925000 ± 13306.557538, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #327: 128001it [01:44, 1226.83it/s, agent0/loss=72557.902, env_step=41856000, len=512, n/ep=40, n/st=25600, rew=-9054.17]                             


Epoch #327: test_reward: -9993.450000 ± 15244.640788, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #328: 128001it [01:44, 1220.88it/s, agent0/loss=72507.023, env_step=41984000, len=512, n/ep=60, n/st=25600, rew=-9130.57]                            


Epoch #328: test_reward: -9156.600000 ± 14857.170619, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #329: 128001it [01:43, 1235.51it/s, agent0/loss=72654.020, env_step=42112000, len=512, n/ep=40, n/st=25600, rew=-8556.92]                             


Epoch #329: test_reward: -7501.975000 ± 13889.632105, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #330: 128001it [01:44, 1228.83it/s, agent0/loss=72741.086, env_step=42240000, len=512, n/ep=60, n/st=25600, rew=-9741.52]                            


Epoch #330: test_reward: -9159.675000 ± 14855.281398, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #331: 128001it [01:42, 1248.83it/s, agent0/loss=72628.747, env_step=42368000, len=512, n/ep=40, n/st=25600, rew=-8802.85]                            


Epoch #331: test_reward: -7498.625000 ± 13891.432991, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #332: 128001it [01:43, 1239.73it/s, agent0/loss=72620.052, env_step=42496000, len=512, n/ep=60, n/st=25600, rew=-9818.82]                             


Epoch #332: test_reward: -7495.875000 ± 13892.911652, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #333: 128001it [01:43, 1233.93it/s, agent0/loss=72734.826, env_step=42624000, len=512, n/ep=40, n/st=25600, rew=-9691.19]                             


Epoch #333: test_reward: -8324.650000 ± 14407.987050, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #334: 128001it [01:43, 1237.60it/s, agent0/loss=72752.332, env_step=42752000, len=512, n/ep=60, n/st=25600, rew=-10044.61]                            


Epoch #334: test_reward: -9161.025000 ± 14854.451438, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #335: 128001it [01:43, 1233.86it/s, agent0/loss=72725.621, env_step=42880000, len=512, n/ep=40, n/st=25600, rew=-9030.51]                            


Epoch #335: test_reward: -9988.800000 ± 15247.678570, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #336: 128001it [01:43, 1232.76it/s, agent0/loss=72828.832, env_step=43008000, len=512, n/ep=60, n/st=25600, rew=-9743.62]                             


Epoch #336: test_reward: -6669.500000 ± 13305.272030, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #337: 128001it [01:43, 1238.74it/s, agent0/loss=72860.931, env_step=43136000, len=512, n/ep=40, n/st=25600, rew=-9045.98]                            


Epoch #337: test_reward: -7498.575000 ± 13891.459754, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #338: 128001it [01:43, 1241.32it/s, agent0/loss=72922.313, env_step=43264000, len=512, n/ep=60, n/st=25600, rew=-9382.63]                             


Epoch #338: test_reward: -9161.450000 ± 14854.190972, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #339: 128001it [01:42, 1250.81it/s, agent0/loss=73067.913, env_step=43392000, len=512, n/ep=40, n/st=25600, rew=-9295.08]                            


Epoch #339: test_reward: -8324.475000 ± 14408.087423, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #340: 128001it [01:43, 1240.07it/s, agent0/loss=73070.002, env_step=43520000, len=512, n/ep=60, n/st=25600, rew=-9113.00]                            


Epoch #340: test_reward: -6664.850000 ± 13307.591113, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #341: 128001it [01:43, 1234.63it/s, agent0/loss=73015.719, env_step=43648000, len=512, n/ep=40, n/st=25600, rew=-8787.62]                            


Epoch #341: test_reward: -8327.550000 ± 14406.315428, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #342: 128001it [01:42, 1243.68it/s, agent0/loss=72844.739, env_step=43776000, len=512, n/ep=60, n/st=25600, rew=-7440.89]                             


Epoch #342: test_reward: -13313.600000 ± 16302.500343, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #343: 128001it [01:43, 1239.94it/s, agent0/loss=72645.304, env_step=43904000, len=512, n/ep=40, n/st=25600, rew=-8473.08]                             


Epoch #343: test_reward: -7498.875000 ± 13891.299076, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #344: 128001it [01:43, 1233.78it/s, agent0/loss=72784.483, env_step=44032000, len=512, n/ep=60, n/st=25600, rew=-9844.42]                             


Epoch #344: test_reward: -6665.050000 ± 13307.491741, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #345: 128001it [01:43, 1232.29it/s, agent0/loss=72982.615, env_step=44160000, len=512, n/ep=40, n/st=25600, rew=-9825.30]                             


Epoch #345: test_reward: -5831.825000 ± 12641.717733, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #346: 128001it [01:42, 1245.27it/s, agent0/loss=72687.419, env_step=44288000, len=512, n/ep=60, n/st=25600, rew=-9438.25]                             


Epoch #346: test_reward: -9990.250000 ± 15246.731312, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #347: 128001it [01:43, 1234.70it/s, agent0/loss=72505.582, env_step=44416000, len=512, n/ep=40, n/st=25600, rew=-9982.90]                            


Epoch #347: test_reward: -6665.300000 ± 13307.367629, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #348: 128001it [01:42, 1251.51it/s, agent0/loss=72792.332, env_step=44544000, len=512, n/ep=60, n/st=25600, rew=-8646.73]                            


Epoch #348: test_reward: -9155.125000 ± 14858.076647, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #349: 128001it [01:41, 1255.56it/s, agent0/loss=72969.475, env_step=44672000, len=512, n/ep=40, n/st=25600, rew=-8233.46]                            


Epoch #349: test_reward: -9993.075000 ± 15244.885143, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #350: 128001it [01:44, 1229.49it/s, agent0/loss=73060.298, env_step=44800000, len=512, n/ep=60, n/st=25600, rew=-7788.06]                             


Epoch #350: test_reward: -6662.100000 ± 13308.962239, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #351: 128001it [01:43, 1231.34it/s, agent0/loss=73234.763, env_step=44928000, len=512, n/ep=40, n/st=25600, rew=-8027.70]                            


Epoch #351: test_reward: -7491.075000 ± 13895.489357, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #352: 128001it [01:43, 1239.84it/s, agent0/loss=73206.640, env_step=45056000, len=512, n/ep=60, n/st=25600, rew=-9274.90]                            


Epoch #352: test_reward: -9990.075000 ± 15246.845333, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #353: 128001it [01:43, 1239.31it/s, agent0/loss=73070.543, env_step=45184000, len=512, n/ep=40, n/st=25600, rew=-9895.84]                             


Epoch #353: test_reward: -4169.400000 ± 11002.794781, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #354: 128001it [01:43, 1242.33it/s, agent0/loss=73110.804, env_step=45312000, len=512, n/ep=60, n/st=25600, rew=-9656.37]                             


Epoch #354: test_reward: -8325.950000 ± 14407.238151, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #355: 128001it [01:43, 1239.57it/s, agent0/loss=73227.944, env_step=45440000, len=512, n/ep=40, n/st=25600, rew=-9629.61]                             


Epoch #355: test_reward: -6668.450000 ± 13305.796863, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #356: 128001it [01:44, 1227.75it/s, agent0/loss=73368.613, env_step=45568000, len=512, n/ep=60, n/st=25600, rew=-9101.42]                             


Epoch #356: test_reward: -7495.675000 ± 13893.018712, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #357: 128001it [01:44, 1230.26it/s, agent0/loss=73347.225, env_step=45696000, len=512, n/ep=40, n/st=25600, rew=-8755.25]                             


Epoch #357: test_reward: -7495.150000 ± 13893.300217, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #358: 128001it [01:43, 1237.14it/s, agent0/loss=73242.676, env_step=45824000, len=512, n/ep=60, n/st=25600, rew=-8583.24]                             


Epoch #358: test_reward: -9156.500000 ± 14857.231860, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #359: 128001it [01:42, 1243.45it/s, agent0/loss=73202.889, env_step=45952000, len=512, n/ep=40, n/st=25600, rew=-10677.58]                            


Epoch #359: test_reward: -7498.700000 ± 13891.392778, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #360: 128001it [01:44, 1229.28it/s, agent0/loss=73264.138, env_step=46080000, len=512, n/ep=60, n/st=25600, rew=-8627.06]                            


Epoch #360: test_reward: -9157.875000 ± 14856.386977, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #361: 128001it [01:42, 1243.85it/s, agent0/loss=73331.964, env_step=46208000, len=512, n/ep=40, n/st=25600, rew=-8951.70]                            


Epoch #361: test_reward: -8327.625000 ± 14406.273947, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #362: 128001it [01:43, 1238.00it/s, agent0/loss=73349.287, env_step=46336000, len=512, n/ep=60, n/st=25600, rew=-8184.74]                            


Epoch #362: test_reward: -8325.975000 ± 14407.223821, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #363: 128001it [01:43, 1234.32it/s, agent0/loss=73342.748, env_step=46464000, len=512, n/ep=40, n/st=25600, rew=-9270.23]                            


Epoch #363: test_reward: -7493.400000 ± 13894.240135, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #364: 128001it [01:43, 1231.62it/s, agent0/loss=73281.793, env_step=46592000, len=512, n/ep=60, n/st=25600, rew=-7921.80]                            


Epoch #364: test_reward: -10820.625000 ± 15584.313906, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #365: 128001it [01:43, 1233.99it/s, agent0/loss=73366.395, env_step=46720000, len=512, n/ep=40, n/st=25600, rew=-8570.54]                            


Epoch #365: test_reward: -8327.550000 ± 14406.316934, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #366: 128001it [01:42, 1245.04it/s, agent0/loss=73512.090, env_step=46848000, len=512, n/ep=60, n/st=25600, rew=-9266.26]                             


Epoch #366: test_reward: -9159.325000 ± 14855.496142, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #367: 128001it [01:42, 1246.98it/s, agent0/loss=73478.769, env_step=46976000, len=512, n/ep=40, n/st=25600, rew=-11633.85]                            


Epoch #367: test_reward: -8328.775000 ± 14405.610811, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #368: 128001it [01:43, 1230.97it/s, agent0/loss=73271.598, env_step=47104000, len=512, n/ep=60, n/st=25600, rew=-8825.33]                            


Epoch #368: test_reward: -6666.175000 ± 13306.930512, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #369: 128001it [01:43, 1231.43it/s, agent0/loss=73277.993, env_step=47232000, len=512, n/ep=40, n/st=25600, rew=-6752.14]                             


Epoch #369: test_reward: -8324.675000 ± 14407.972712, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #370: 128001it [01:43, 1237.64it/s, agent0/loss=73507.330, env_step=47360000, len=512, n/ep=60, n/st=25600, rew=-8244.75]                            


Epoch #370: test_reward: -5838.700000 ± 12638.559115, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #371: 128001it [01:44, 1230.60it/s, agent0/loss=73587.703, env_step=47488000, len=512, n/ep=40, n/st=25600, rew=-8644.48]                            


Epoch #371: test_reward: -10821.800000 ± 15583.499954, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #372: 128001it [01:44, 1225.19it/s, agent0/loss=73532.259, env_step=47616000, len=512, n/ep=60, n/st=25600, rew=-9006.01]                            


Epoch #372: test_reward: -5837.950000 ± 12638.905235, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #373: 128001it [01:43, 1240.75it/s, agent0/loss=73553.363, env_step=47744000, len=512, n/ep=40, n/st=25600, rew=-9795.73]                            


Epoch #373: test_reward: -6671.150000 ± 13304.448783, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #374: 128001it [01:43, 1232.64it/s, agent0/loss=73703.911, env_step=47872000, len=512, n/ep=60, n/st=25600, rew=-9015.78]                            


Epoch #374: test_reward: -6663.750000 ± 13308.140253, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #375: 128001it [01:43, 1239.62it/s, agent0/loss=73803.847, env_step=48000000, len=512, n/ep=40, n/st=25600, rew=-8984.06]                            


Epoch #375: test_reward: -8329.250000 ± 14405.338097, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #376: 128001it [01:43, 1238.12it/s, agent0/loss=73802.496, env_step=48128000, len=512, n/ep=60, n/st=25600, rew=-8450.87]                             


Epoch #376: test_reward: -9161.225000 ± 14854.328836, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #377: 128001it [01:43, 1239.31it/s, agent0/loss=73768.318, env_step=48256000, len=512, n/ep=40, n/st=25600, rew=-11557.40]                            


Epoch #377: test_reward: -5002.250000 ± 11879.051317, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #378: 128001it [01:43, 1234.04it/s, agent0/loss=73724.838, env_step=48384000, len=512, n/ep=60, n/st=25600, rew=-8063.60]                             


Epoch #378: test_reward: -9988.275000 ± 15248.020429, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #379: 128001it [01:44, 1227.47it/s, agent0/loss=73775.584, env_step=48512000, len=512, n/ep=40, n/st=25600, rew=-9186.59]                            


Epoch #379: test_reward: -9987.025000 ± 15248.837178, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #380: 128001it [01:43, 1232.85it/s, agent0/loss=73722.782, env_step=48640000, len=512, n/ep=60, n/st=25600, rew=-8827.10]                            


Epoch #380: test_reward: -8330.325000 ± 14404.718148, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #381: 128001it [01:43, 1237.66it/s, agent0/loss=73674.381, env_step=48768000, len=512, n/ep=40, n/st=25600, rew=-7971.85]                            


Epoch #381: test_reward: -7500.225000 ± 13890.572917, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #382: 128001it [01:44, 1225.20it/s, agent0/loss=73819.829, env_step=48896000, len=512, n/ep=60, n/st=25600, rew=-9738.67]                            


Epoch #382: test_reward: -8330.975000 ± 14404.344630, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #383: 128001it [01:43, 1234.24it/s, agent0/loss=73883.260, env_step=49024000, len=512, n/ep=40, n/st=25600, rew=-9138.26]                            


Epoch #383: test_reward: -9159.775000 ± 14855.220161, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #384: 128001it [01:43, 1230.93it/s, agent0/loss=73930.132, env_step=49152000, len=512, n/ep=60, n/st=25600, rew=-8448.89]                            


Epoch #384: test_reward: -8329.100000 ± 14405.424244, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #385: 128001it [01:43, 1234.74it/s, agent0/loss=73940.997, env_step=49280000, len=512, n/ep=40, n/st=25600, rew=-9184.30]                             


Epoch #385: test_reward: -8329.275000 ± 14405.323761, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #386: 128001it [01:43, 1233.33it/s, agent0/loss=73973.820, env_step=49408000, len=512, n/ep=60, n/st=25600, rew=-11423.50]                            


Epoch #386: test_reward: -7493.825000 ± 13894.011906, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #387: 128001it [01:43, 1231.13it/s, agent0/loss=74078.076, env_step=49536000, len=512, n/ep=40, n/st=25600, rew=-8876.02]                            


Epoch #387: test_reward: -5002.600000 ± 11878.905534, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #388: 128001it [01:43, 1238.84it/s, agent0/loss=74070.738, env_step=49664000, len=512, n/ep=60, n/st=25600, rew=-9453.24]                            


Epoch #388: test_reward: -7495.900000 ± 13892.898277, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #389: 128001it [01:43, 1240.67it/s, agent0/loss=73955.614, env_step=49792000, len=512, n/ep=40, n/st=25600, rew=-10892.74]                            


Epoch #389: test_reward: -12483.125000 ± 16109.195683, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #390: 128001it [01:42, 1245.23it/s, agent0/loss=73705.758, env_step=49920000, len=512, n/ep=60, n/st=25600, rew=-11159.03]                            


Epoch #390: test_reward: -6668.300000 ± 13305.871402, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #391: 128001it [01:43, 1232.92it/s, agent0/loss=73464.841, env_step=50048000, len=512, n/ep=40, n/st=25600, rew=-8117.21]                             


Epoch #391: test_reward: -5831.675000 ± 12641.786214, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #392: 128001it [01:44, 1230.27it/s, agent0/loss=73654.619, env_step=50176000, len=512, n/ep=60, n/st=25600, rew=-8598.09]                             


Epoch #392: test_reward: -8328.900000 ± 14405.539018, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #393: 128001it [01:43, 1239.95it/s, agent0/loss=73963.214, env_step=50304000, len=512, n/ep=40, n/st=25600, rew=-8516.48]                            


Epoch #393: test_reward: -7500.075000 ± 13890.653396, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #394: 128001it [01:43, 1231.65it/s, agent0/loss=74037.916, env_step=50432000, len=512, n/ep=60, n/st=25600, rew=-7246.91]                            


Epoch #394: test_reward: -10820.550000 ± 15584.365690, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #395: 128001it [01:43, 1239.43it/s, agent0/loss=74076.214, env_step=50560000, len=512, n/ep=40, n/st=25600, rew=-9621.16]                            


Epoch #395: test_reward: -9159.675000 ± 14855.281413, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #396: 128001it [01:43, 1234.30it/s, agent0/loss=74174.653, env_step=50688000, len=512, n/ep=60, n/st=25600, rew=-8922.94]                             


Epoch #396: test_reward: -8328.625000 ± 14405.697383, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #397: 128001it [01:43, 1240.34it/s, agent0/loss=74267.728, env_step=50816000, len=512, n/ep=40, n/st=25600, rew=-6895.93]                            


Epoch #397: test_reward: -9991.700000 ± 15245.783865, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #398: 128001it [01:43, 1241.26it/s, agent0/loss=74200.105, env_step=50944000, len=512, n/ep=60, n/st=25600, rew=-8566.52]                            


Epoch #398: test_reward: -8323.200000 ± 14408.821831, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #399: 128001it [01:43, 1238.58it/s, agent0/loss=74022.541, env_step=51072000, len=512, n/ep=40, n/st=25600, rew=-6869.88]                            


Epoch #399: test_reward: -9158.225000 ± 14856.172534, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #400: 128001it [01:43, 1239.22it/s, agent0/loss=73893.920, env_step=51200000, len=512, n/ep=60, n/st=25600, rew=-8670.65]                             


Epoch #400: test_reward: -5831.850000 ± 12641.706320, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #401: 128001it [01:43, 1231.39it/s, agent0/loss=73834.597, env_step=51328000, len=512, n/ep=40, n/st=25600, rew=-8993.60]                             


Epoch #401: test_reward: -10820.625000 ± 15584.313929, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #402: 128001it [01:43, 1237.16it/s, agent0/loss=73935.722, env_step=51456000, len=512, n/ep=60, n/st=25600, rew=-8751.20]                             


Epoch #402: test_reward: -5836.550000 ± 12639.548722, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #403: 128001it [01:44, 1229.00it/s, agent0/loss=73554.978, env_step=51584000, len=512, n/ep=40, n/st=25600, rew=-8955.81]                             


Epoch #403: test_reward: -7496.475000 ± 13892.587675, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #404: 128001it [01:43, 1236.20it/s, agent0/loss=73214.461, env_step=51712000, len=512, n/ep=60, n/st=25600, rew=-9312.08]                             


Epoch #404: test_reward: -9159.800000 ± 14855.204849, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #405: 128001it [01:43, 1232.93it/s, agent0/loss=73713.921, env_step=51840000, len=512, n/ep=40, n/st=25600, rew=-7642.02]                            


Epoch #405: test_reward: -7494.200000 ± 13893.811250, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #406: 128001it [01:43, 1238.70it/s, agent0/loss=74172.906, env_step=51968000, len=512, n/ep=60, n/st=25600, rew=-7282.10]                            


Epoch #406: test_reward: -6666.275000 ± 13306.880463, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #407: 128001it [01:43, 1233.68it/s, agent0/loss=74440.270, env_step=52096000, len=512, n/ep=40, n/st=25600, rew=-8254.45]                            


Epoch #407: test_reward: -6665.300000 ± 13307.367629, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #408: 128001it [01:42, 1247.21it/s, agent0/loss=74547.026, env_step=52224000, len=512, n/ep=60, n/st=25600, rew=-8583.12]                            


Epoch #408: test_reward: -5836.125000 ± 12639.742933, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #409: 128001it [01:43, 1238.11it/s, agent0/loss=74513.425, env_step=52352000, len=512, n/ep=40, n/st=25600, rew=-9377.52]                             


Epoch #409: test_reward: -6658.975000 ± 13310.518772, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #410: 128001it [01:43, 1231.47it/s, agent0/loss=74491.252, env_step=52480000, len=512, n/ep=60, n/st=25600, rew=-9395.93]                             


Epoch #410: test_reward: -7498.900000 ± 13891.285689, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #411: 128001it [01:44, 1227.67it/s, agent0/loss=74494.725, env_step=52608000, len=512, n/ep=40, n/st=25600, rew=-8068.88]                            


Epoch #411: test_reward: -8329.450000 ± 14405.223296, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #412: 128001it [01:43, 1239.08it/s, agent0/loss=74330.479, env_step=52736000, len=512, n/ep=60, n/st=25600, rew=-8142.32]                            


Epoch #412: test_reward: -8329.200000 ± 14405.366795, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #413: 128001it [01:43, 1233.68it/s, agent0/loss=74170.961, env_step=52864000, len=512, n/ep=40, n/st=25600, rew=-12589.54]                            


Epoch #413: test_reward: -7498.975000 ± 13891.245539, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #414: 128001it [01:45, 1215.44it/s, agent0/loss=74193.565, env_step=52992000, len=512, n/ep=60, n/st=25600, rew=-9344.88]                             


Epoch #414: test_reward: -8329.175000 ± 14405.381125, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #415: 128001it [01:43, 1239.34it/s, agent0/loss=74123.352, env_step=53120000, len=512, n/ep=40, n/st=25600, rew=-9897.80]                             


Epoch #415: test_reward: -6668.275000 ± 13305.883772, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #416: 128001it [01:42, 1244.62it/s, agent0/loss=74241.776, env_step=53248000, len=512, n/ep=60, n/st=25600, rew=-7335.91]                            


Epoch #416: test_reward: -9159.825000 ± 14855.189531, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #417: 128001it [01:43, 1237.41it/s, agent0/loss=74506.368, env_step=53376000, len=512, n/ep=40, n/st=25600, rew=-6472.21]                            


Epoch #417: test_reward: -9994.825000 ± 15243.741778, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #418: 128001it [01:44, 1230.26it/s, agent0/loss=74680.183, env_step=53504000, len=512, n/ep=60, n/st=25600, rew=-10411.69]                            


Epoch #418: test_reward: -10819.200000 ± 15585.300644, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #419: 128001it [01:44, 1224.82it/s, agent0/loss=74751.962, env_step=53632000, len=512, n/ep=40, n/st=25600, rew=-10314.77]                            


Epoch #419: test_reward: -9162.900000 ± 14853.299397, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #420: 128001it [01:44, 1228.43it/s, agent0/loss=74604.178, env_step=53760000, len=512, n/ep=60, n/st=25600, rew=-9348.51]                             


Epoch #420: test_reward: -5002.825000 ± 11878.811897, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #421: 128001it [01:43, 1235.16it/s, agent0/loss=74432.589, env_step=53888000, len=512, n/ep=40, n/st=25600, rew=-10132.17]                            


Epoch #421: test_reward: -6662.200000 ± 13308.912638, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #422: 128001it [01:44, 1220.65it/s, agent0/loss=74445.216, env_step=54016000, len=512, n/ep=60, n/st=25600, rew=-8218.19]                             


Epoch #422: test_reward: -6662.050000 ± 13308.987059, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #423: 128001it [01:43, 1233.99it/s, agent0/loss=74562.821, env_step=54144000, len=512, n/ep=40, n/st=25600, rew=-8540.81]                            


Epoch #423: test_reward: -7503.075000 ± 13889.039811, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #424: 128001it [01:43, 1237.48it/s, agent0/loss=74702.988, env_step=54272000, len=512, n/ep=60, n/st=25600, rew=-9175.27]                             


Epoch #424: test_reward: -6666.975000 ± 13306.532718, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #425: 128001it [01:44, 1230.40it/s, agent0/loss=74788.124, env_step=54400000, len=512, n/ep=40, n/st=25600, rew=-7491.54]                            


Epoch #425: test_reward: -9158.225000 ± 14856.172540, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #426: 128001it [01:43, 1234.18it/s, agent0/loss=74764.631, env_step=54528000, len=512, n/ep=60, n/st=25600, rew=-7757.90]                            


Epoch #426: test_reward: -6662.125000 ± 13308.949837, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #427: 128001it [01:43, 1239.97it/s, agent0/loss=74485.252, env_step=54656000, len=512, n/ep=40, n/st=25600, rew=-7546.77]                            


Epoch #427: test_reward: -9988.625000 ± 15247.792499, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #428: 128001it [01:44, 1228.91it/s, agent0/loss=74151.216, env_step=54784000, len=512, n/ep=60, n/st=25600, rew=-9115.56]                            


Epoch #428: test_reward: -8326.075000 ± 14407.166486, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #429: 128001it [01:43, 1237.45it/s, agent0/loss=74261.564, env_step=54912000, len=512, n/ep=40, n/st=25600, rew=-9182.70]                            


Epoch #429: test_reward: -7494.175000 ± 13893.824617, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #430: 128001it [01:43, 1235.56it/s, agent0/loss=74582.502, env_step=55040000, len=512, n/ep=60, n/st=25600, rew=-9224.31]                             


Epoch #430: test_reward: -9988.650000 ± 15247.776219, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #431: 128001it [01:43, 1232.76it/s, agent0/loss=74678.303, env_step=55168000, len=512, n/ep=40, n/st=25600, rew=-7363.25]                            


Epoch #431: test_reward: -10821.500000 ± 15583.707251, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #432: 128001it [01:44, 1227.75it/s, agent0/loss=74714.953, env_step=55296000, len=512, n/ep=60, n/st=25600, rew=-10149.91]                            


Epoch #432: test_reward: -7496.875000 ± 13892.373352, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #433: 128001it [01:42, 1244.28it/s, agent0/loss=74791.827, env_step=55424000, len=512, n/ep=40, n/st=25600, rew=-8696.08]                            


Epoch #433: test_reward: -10818.850000 ± 15585.542272, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #434: 128001it [01:43, 1240.48it/s, agent0/loss=74836.503, env_step=55552000, len=512, n/ep=60, n/st=25600, rew=-9250.72]                            


Epoch #434: test_reward: -6670.000000 ± 13305.023546, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #435: 128001it [01:43, 1239.72it/s, agent0/loss=74875.485, env_step=55680000, len=512, n/ep=40, n/st=25600, rew=-9573.31]                            


Epoch #435: test_reward: -6666.025000 ± 13307.004760, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #436: 128001it [01:43, 1234.13it/s, agent0/loss=74766.478, env_step=55808000, len=512, n/ep=60, n/st=25600, rew=-8155.88]                             


Epoch #436: test_reward: -10822.300000 ± 15583.154358, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #437: 128001it [01:42, 1243.17it/s, agent0/loss=74546.863, env_step=55936000, len=512, n/ep=40, n/st=25600, rew=-9942.75]                            


Epoch #437: test_reward: -7495.950000 ± 13892.871526, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #438: 128001it [01:42, 1245.08it/s, agent0/loss=74505.751, env_step=56064000, len=512, n/ep=60, n/st=25600, rew=-8683.79]                             


Epoch #438: test_reward: -7500.450000 ± 13890.452325, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #439: 128001it [01:43, 1238.04it/s, agent0/loss=74657.306, env_step=56192000, len=512, n/ep=40, n/st=25600, rew=-9070.50]                            


Epoch #439: test_reward: -9162.725000 ± 14853.406650, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #440: 128001it [01:43, 1234.30it/s, agent0/loss=74868.051, env_step=56320000, len=512, n/ep=60, n/st=25600, rew=-9635.79]                             


Epoch #440: test_reward: -8327.750000 ± 14406.202178, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #441: 128001it [01:43, 1238.31it/s, agent0/loss=75047.896, env_step=56448000, len=512, n/ep=40, n/st=25600, rew=-9335.33]                            


Epoch #441: test_reward: -9987.050000 ± 15248.820884, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #442: 128001it [01:42, 1248.09it/s, agent0/loss=75163.546, env_step=56576000, len=512, n/ep=60, n/st=25600, rew=-9064.35]                            


Epoch #442: test_reward: -4172.175000 ± 11001.749799, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #443: 128001it [01:43, 1234.85it/s, agent0/loss=75184.582, env_step=56704000, len=512, n/ep=40, n/st=25600, rew=-9269.54]                             


Epoch #443: test_reward: -5002.550000 ± 11878.926380, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #444: 128001it [01:42, 1244.28it/s, agent0/loss=75041.089, env_step=56832000, len=512, n/ep=60, n/st=25600, rew=-10016.16]                            


Epoch #444: test_reward: -7494.150000 ± 13893.838016, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #445: 128001it [01:41, 1256.39it/s, agent0/loss=74923.636, env_step=56960000, len=512, n/ep=40, n/st=25600, rew=-9623.00]                            


Epoch #445: test_reward: -8327.775000 ± 14406.187821, best_reward: -2509.850000 ± 8761.757029 in #323


Epoch #446: 128001it [01:42, 1244.73it/s, agent0/loss=74922.373, env_step=57088000, len=512, n/ep=60, n/st=25600, rew=-9157.13]                            


Best Saved
Epoch #446: test_reward: -2509.725000 ± 8761.792011, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #447: 128001it [01:43, 1236.68it/s, agent0/loss=74974.839, env_step=57216000, len=512, n/ep=40, n/st=25600, rew=-8995.50]                             


Epoch #447: test_reward: -7497.000000 ± 13892.306198, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #448: 128001it [01:43, 1241.23it/s, agent0/loss=75091.438, env_step=57344000, len=512, n/ep=60, n/st=25600, rew=-9467.48]                            


Epoch #448: test_reward: -7497.200000 ± 13892.199146, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #449: 128001it [01:43, 1238.44it/s, agent0/loss=75227.381, env_step=57472000, len=512, n/ep=40, n/st=25600, rew=-10582.55]                            


Epoch #449: test_reward: -6663.475000 ± 13308.276729, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #450: 128001it [01:42, 1245.76it/s, agent0/loss=75296.004, env_step=57600000, len=512, n/ep=60, n/st=25600, rew=-8222.08]                            


Epoch #450: test_reward: -10825.425000 ± 15580.989890, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #451: 128001it [01:43, 1234.70it/s, agent0/loss=75259.716, env_step=57728000, len=512, n/ep=40, n/st=25600, rew=-9665.27]                            


Epoch #451: test_reward: -6666.975000 ± 13306.532718, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #452: 128001it [01:43, 1242.21it/s, agent0/loss=75297.245, env_step=57856000, len=512, n/ep=60, n/st=25600, rew=-10217.17]                            


Epoch #452: test_reward: -5836.100000 ± 12639.754542, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #453: 128001it [01:42, 1245.58it/s, agent0/loss=75359.730, env_step=57984000, len=512, n/ep=40, n/st=25600, rew=-9138.30]                            


Epoch #453: test_reward: -11650.875000 ± 15871.443477, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #454: 128001it [01:42, 1243.94it/s, agent0/loss=75276.818, env_step=58112000, len=512, n/ep=60, n/st=25600, rew=-8701.70]                             


Epoch #454: test_reward: -10820.425000 ± 15584.452018, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #455: 128001it [01:43, 1242.51it/s, agent0/loss=75279.381, env_step=58240000, len=512, n/ep=40, n/st=25600, rew=-8860.79]                             


Epoch #455: test_reward: -7499.050000 ± 13891.205381, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #456: 128001it [01:43, 1238.69it/s, agent0/loss=75299.735, env_step=58368000, len=512, n/ep=60, n/st=25600, rew=-9478.11]                            


Epoch #456: test_reward: -7495.550000 ± 13893.085600, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #457: 128001it [01:43, 1235.20it/s, agent0/loss=75212.244, env_step=58496000, len=512, n/ep=40, n/st=25600, rew=-7359.40]                            


Epoch #457: test_reward: -9161.225000 ± 14854.328816, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #458: 128001it [01:44, 1228.74it/s, agent0/loss=75161.560, env_step=58624000, len=512, n/ep=60, n/st=25600, rew=-9736.96]                            


Epoch #458: test_reward: -5834.975000 ± 12640.271958, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #459: 128001it [01:42, 1242.83it/s, agent0/loss=75244.488, env_step=58752000, len=512, n/ep=40, n/st=25600, rew=-7503.65]                            


Epoch #459: test_reward: -8329.475000 ± 14405.208948, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #460: 128001it [01:43, 1232.72it/s, agent0/loss=75442.894, env_step=58880000, len=512, n/ep=60, n/st=25600, rew=-8680.89]                            


Epoch #460: test_reward: -8327.475000 ± 14406.359988, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #461: 128001it [01:43, 1235.79it/s, agent0/loss=75557.126, env_step=59008000, len=512, n/ep=40, n/st=25600, rew=-10013.70]                            


Epoch #461: test_reward: -11652.750000 ± 15870.071149, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #462: 128001it [01:43, 1234.33it/s, agent0/loss=75568.811, env_step=59136000, len=512, n/ep=60, n/st=25600, rew=-6645.31]                            


Epoch #462: test_reward: -8326.200000 ± 14407.094739, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #463: 128001it [01:43, 1231.74it/s, agent0/loss=75646.208, env_step=59264000, len=512, n/ep=40, n/st=25600, rew=-7623.91]                             


Epoch #463: test_reward: -6662.225000 ± 13308.900241, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #464: 128001it [01:42, 1245.00it/s, agent0/loss=75539.509, env_step=59392000, len=512, n/ep=60, n/st=25600, rew=-10190.58]                            


Epoch #464: test_reward: -7497.325000 ± 13892.132249, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #465: 128001it [01:43, 1238.62it/s, agent0/loss=75385.226, env_step=59520000, len=512, n/ep=40, n/st=25600, rew=-8294.41]                            


Epoch #465: test_reward: -9161.425000 ± 14854.206293, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #466: 128001it [01:43, 1241.71it/s, agent0/loss=75339.221, env_step=59648000, len=512, n/ep=60, n/st=25600, rew=-8843.48]                             


Epoch #466: test_reward: -5836.275000 ± 12639.674412, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #467: 128001it [01:43, 1241.45it/s, agent0/loss=75365.766, env_step=59776000, len=512, n/ep=40, n/st=25600, rew=-5929.46]                            


Epoch #467: test_reward: -9156.675000 ± 14857.124704, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #468: 128001it [01:43, 1234.78it/s, agent0/loss=75339.783, env_step=59904000, len=512, n/ep=60, n/st=25600, rew=-8781.33]                            


Epoch #468: test_reward: -7495.450000 ± 13893.139186, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #469: 128001it [01:44, 1229.19it/s, agent0/loss=75188.499, env_step=60032000, len=512, n/ep=40, n/st=25600, rew=-8518.76]                            


Epoch #469: test_reward: -5000.375000 ± 11879.835183, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #470: 128001it [01:42, 1247.30it/s, agent0/loss=75345.839, env_step=60160000, len=512, n/ep=60, n/st=25600, rew=-9689.02]                            


Epoch #470: test_reward: -9990.275000 ± 15246.715033, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #471: 128001it [01:41, 1254.93it/s, agent0/loss=75629.580, env_step=60288000, len=512, n/ep=40, n/st=25600, rew=-9103.51]                            


Epoch #471: test_reward: -9158.125000 ± 14856.233783, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #472: 128001it [01:43, 1238.42it/s, agent0/loss=75650.576, env_step=60416000, len=512, n/ep=60, n/st=25600, rew=-8930.69]                            


Epoch #472: test_reward: -8329.250000 ± 14405.338095, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #473: 128001it [01:43, 1238.32it/s, agent0/loss=75596.734, env_step=60544000, len=512, n/ep=40, n/st=25600, rew=-8873.69]                            


Epoch #473: test_reward: -11649.600000 ± 15872.377334, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #474: 128001it [01:43, 1236.05it/s, agent0/loss=75691.320, env_step=60672000, len=512, n/ep=60, n/st=25600, rew=-9631.49]                            


Epoch #474: test_reward: -8332.475000 ± 14403.480100, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #475: 128001it [01:43, 1235.78it/s, agent0/loss=75809.131, env_step=60800000, len=512, n/ep=40, n/st=25600, rew=-8112.39]                            


Epoch #475: test_reward: -10822.125000 ± 15583.275200, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #476: 128001it [01:43, 1241.67it/s, agent0/loss=75755.308, env_step=60928000, len=512, n/ep=60, n/st=25600, rew=-8692.06]                             


Epoch #476: test_reward: -5835.825000 ± 12639.880227, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #477: 128001it [01:43, 1236.67it/s, agent0/loss=75676.184, env_step=61056000, len=512, n/ep=40, n/st=25600, rew=-8002.43]                             


Epoch #477: test_reward: -9158.275000 ± 14856.141923, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #478: 128001it [01:43, 1231.55it/s, agent0/loss=75700.232, env_step=61184000, len=512, n/ep=60, n/st=25600, rew=-7667.49]                            


Epoch #478: test_reward: -8327.175000 ± 14406.532319, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #479: 128001it [01:43, 1235.10it/s, agent0/loss=75791.201, env_step=61312000, len=512, n/ep=40, n/st=25600, rew=-10290.95]                            


Epoch #479: test_reward: -7493.575000 ± 13894.146314, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #480: 128001it [01:43, 1240.88it/s, agent0/loss=75974.342, env_step=61440000, len=512, n/ep=60, n/st=25600, rew=-7822.81]                            


Epoch #480: test_reward: -6666.675000 ± 13306.681704, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #481: 128001it [01:44, 1226.65it/s, agent0/loss=76109.969, env_step=61568000, len=512, n/ep=40, n/st=25600, rew=-8749.88]                            


Epoch #481: test_reward: -8327.600000 ± 14406.288236, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #482: 128001it [01:43, 1238.27it/s, agent0/loss=76070.108, env_step=61696000, len=512, n/ep=60, n/st=25600, rew=-9529.13]                             


Epoch #482: test_reward: -8328.725000 ± 14405.639764, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #483: 128001it [01:43, 1235.48it/s, agent0/loss=75958.559, env_step=61824000, len=512, n/ep=40, n/st=25600, rew=-9716.38]                             


Epoch #483: test_reward: -9155.150000 ± 14858.061347, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #484: 128001it [01:42, 1242.82it/s, agent0/loss=75927.079, env_step=61952000, len=512, n/ep=60, n/st=25600, rew=-8916.73]                            


Epoch #484: test_reward: -9156.700000 ± 14857.109401, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #485: 128001it [01:44, 1227.04it/s, agent0/loss=75924.463, env_step=62080000, len=512, n/ep=40, n/st=25600, rew=-9878.51]                             


Epoch #485: test_reward: -9158.125000 ± 14856.233796, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #486: 128001it [01:43, 1232.36it/s, agent0/loss=75937.845, env_step=62208000, len=512, n/ep=60, n/st=25600, rew=-8234.54]                            


Epoch #486: test_reward: -11654.225000 ± 15868.990755, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #487: 128001it [01:43, 1236.07it/s, agent0/loss=75917.855, env_step=62336000, len=512, n/ep=40, n/st=25600, rew=-8609.74]                            


Epoch #487: test_reward: -6664.975000 ± 13307.528930, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #488: 128001it [01:42, 1248.15it/s, agent0/loss=75921.051, env_step=62464000, len=512, n/ep=60, n/st=25600, rew=-10332.43]                            


Epoch #488: test_reward: -7497.150000 ± 13892.225915, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #489: 128001it [01:43, 1237.62it/s, agent0/loss=75916.220, env_step=62592000, len=512, n/ep=40, n/st=25600, rew=-8458.56]                             


Epoch #489: test_reward: -5004.300000 ± 11878.194478, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #490: 128001it [01:44, 1228.53it/s, agent0/loss=75937.052, env_step=62720000, len=512, n/ep=60, n/st=25600, rew=-8946.92]                            


Epoch #490: test_reward: -9988.725000 ± 15247.727395, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #491: 128001it [01:44, 1229.30it/s, agent0/loss=76090.259, env_step=62848000, len=512, n/ep=40, n/st=25600, rew=-9634.92]                             


Epoch #491: test_reward: -9991.800000 ± 15245.718731, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #492: 128001it [01:43, 1240.67it/s, agent0/loss=76257.488, env_step=62976000, len=512, n/ep=60, n/st=25600, rew=-9523.75]                            


Epoch #492: test_reward: -6663.950000 ± 13308.041032, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #493: 128001it [01:44, 1226.15it/s, agent0/loss=76353.247, env_step=63104000, len=512, n/ep=40, n/st=25600, rew=-8238.00]                            


Epoch #493: test_reward: -9990.075000 ± 15246.845276, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #494: 128001it [01:43, 1235.99it/s, agent0/loss=76059.166, env_step=63232000, len=512, n/ep=60, n/st=25600, rew=-9335.78]                             


Epoch #494: test_reward: -5836.000000 ± 12639.800236, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #495: 128001it [01:42, 1245.61it/s, agent0/loss=75352.014, env_step=63360000, len=512, n/ep=40, n/st=25600, rew=-8306.74]                             


Epoch #495: test_reward: -8323.175000 ± 14408.836162, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #496: 128001it [01:44, 1229.19it/s, agent0/loss=75364.692, env_step=63488000, len=512, n/ep=60, n/st=25600, rew=-8094.82]                            


Epoch #496: test_reward: -9993.225000 ± 15244.787356, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #497: 128001it [01:44, 1229.67it/s, agent0/loss=76043.074, env_step=63616000, len=512, n/ep=40, n/st=25600, rew=-8097.62]                            


Epoch #497: test_reward: -7494.300000 ± 13893.757752, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #498: 128001it [01:44, 1230.76it/s, agent0/loss=76120.162, env_step=63744000, len=512, n/ep=60, n/st=25600, rew=-7743.84]                            


Epoch #498: test_reward: -7492.525000 ± 13894.710610, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #499: 128001it [01:43, 1235.10it/s, agent0/loss=75977.535, env_step=63872000, len=512, n/ep=40, n/st=25600, rew=-7258.60]                             


Epoch #499: test_reward: -9987.100000 ± 15248.788340, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #500: 128001it [01:44, 1226.59it/s, agent0/loss=76199.795, env_step=64000000, len=512, n/ep=60, n/st=25600, rew=-9481.66]                             


Epoch #500: test_reward: -7497.400000 ± 13892.092083, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #501: 128001it [01:44, 1229.77it/s, agent0/loss=76374.957, env_step=64128000, len=512, n/ep=40, n/st=25600, rew=-9363.20]                            


Epoch #501: test_reward: -9991.325000 ± 15246.028459, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #502: 128001it [01:43, 1232.93it/s, agent0/loss=76325.928, env_step=64256000, len=512, n/ep=60, n/st=25600, rew=-8138.89]                            


Epoch #502: test_reward: -4175.625000 ± 11000.451492, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #503: 128001it [01:43, 1241.84it/s, agent0/loss=76249.794, env_step=64384000, len=512, n/ep=40, n/st=25600, rew=-7097.25]                             


Epoch #503: test_reward: -9158.300000 ± 14856.126617, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #504: 128001it [01:43, 1232.50it/s, agent0/loss=76314.656, env_step=64512000, len=512, n/ep=60, n/st=25600, rew=-8589.40]                            


Epoch #504: test_reward: -6666.850000 ± 13306.594840, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #505: 128001it [01:44, 1227.90it/s, agent0/loss=76364.593, env_step=64640000, len=512, n/ep=40, n/st=25600, rew=-8297.54]                            


Epoch #505: test_reward: -6664.825000 ± 13307.603511, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #506: 128001it [01:48, 1182.10it/s, agent0/loss=76286.687, env_step=64768000, len=512, n/ep=60, n/st=25600, rew=-8002.79]                            


Epoch #506: test_reward: -8330.875000 ± 14404.402041, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #507: 128001it [01:45, 1207.84it/s, agent0/loss=76333.875, env_step=64896000, len=512, n/ep=40, n/st=25600, rew=-9553.62]                            


Epoch #507: test_reward: -10820.525000 ± 15584.382978, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #508: 128001it [01:46, 1197.00it/s, agent0/loss=76457.202, env_step=65024000, len=512, n/ep=60, n/st=25600, rew=-8757.41]                            


Epoch #508: test_reward: -8327.625000 ± 14406.273924, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #509: 128001it [01:44, 1220.23it/s, agent0/loss=76350.116, env_step=65152000, len=512, n/ep=40, n/st=25600, rew=-9227.81]                             


Epoch #509: test_reward: -8326.150000 ± 14407.123401, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #510: 128001it [01:46, 1203.15it/s, agent0/loss=76254.411, env_step=65280000, len=512, n/ep=60, n/st=25600, rew=-8817.22]                            


Epoch #510: test_reward: -8329.275000 ± 14405.323736, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #511: 128001it [01:50, 1155.83it/s, agent0/loss=76357.981, env_step=65408000, len=512, n/ep=40, n/st=25600, rew=-9539.83]                            


Epoch #511: test_reward: -7497.475000 ± 13892.051947, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #512: 128001it [01:46, 1201.52it/s, agent0/loss=76470.668, env_step=65536000, len=512, n/ep=60, n/st=25600, rew=-7482.08]                            


Epoch #512: test_reward: -7495.450000 ± 13893.139316, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #513: 128001it [01:46, 1203.35it/s, agent0/loss=76407.978, env_step=65664000, len=512, n/ep=40, n/st=25600, rew=-9777.39]                            


Epoch #513: test_reward: -10819.100000 ± 15585.369665, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #514: 128001it [01:47, 1195.94it/s, agent0/loss=76271.785, env_step=65792000, len=512, n/ep=60, n/st=25600, rew=-9305.00]                            


Epoch #514: test_reward: -5836.475000 ± 12639.582980, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #515: 128001it [01:45, 1214.42it/s, agent0/loss=76342.320, env_step=65920000, len=512, n/ep=40, n/st=25600, rew=-9322.04]                            


Epoch #515: test_reward: -5831.850000 ± 12641.706322, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #516: 128001it [01:45, 1210.93it/s, agent0/loss=76471.738, env_step=66048000, len=512, n/ep=60, n/st=25600, rew=-10041.09]                            


Epoch #516: test_reward: -5831.700000 ± 12641.774805, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #517: 128001it [01:46, 1202.03it/s, agent0/loss=76574.612, env_step=66176000, len=512, n/ep=40, n/st=25600, rew=-9468.65]                            


Epoch #517: test_reward: -8331.000000 ± 14404.330273, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #518: 128001it [01:49, 1173.72it/s, agent0/loss=76712.086, env_step=66304000, len=512, n/ep=60, n/st=25600, rew=-7285.93]                            


Epoch #518: test_reward: -11651.150000 ± 15871.242614, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #519: 128001it [01:47, 1185.53it/s, agent0/loss=76831.369, env_step=66432000, len=512, n/ep=40, n/st=25600, rew=-11525.80]                            


Epoch #519: test_reward: -7496.275000 ± 13892.694994, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #520: 128001it [01:47, 1191.18it/s, agent0/loss=76815.673, env_step=66560000, len=512, n/ep=60, n/st=25600, rew=-10091.88]                            


Epoch #520: test_reward: -9155.075000 ± 14858.107244, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #521: 128001it [01:47, 1194.99it/s, agent0/loss=76694.631, env_step=66688000, len=512, n/ep=40, n/st=25600, rew=-8173.94]                            


Epoch #521: test_reward: -9160.900000 ± 14854.528070, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #522: 128001it [01:47, 1195.47it/s, agent0/loss=76637.284, env_step=66816000, len=512, n/ep=60, n/st=25600, rew=-9042.77]                            


Epoch #522: test_reward: -11649.525000 ± 15872.432085, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #523: 128001it [01:45, 1217.21it/s, agent0/loss=76713.427, env_step=66944000, len=512, n/ep=40, n/st=25600, rew=-8182.46]                            


Epoch #523: test_reward: -7500.250000 ± 13890.559465, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #524: 128001it [01:47, 1194.32it/s, agent0/loss=76867.197, env_step=67072000, len=512, n/ep=60, n/st=25600, rew=-10053.29]                            


Epoch #524: test_reward: -11652.650000 ± 15870.144184, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #525: 128001it [01:46, 1207.19it/s, agent0/loss=76873.412, env_step=67200000, len=512, n/ep=40, n/st=25600, rew=-9933.36]                             


Epoch #525: test_reward: -6660.750000 ± 13309.635244, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #526: 128001it [01:46, 1203.13it/s, agent0/loss=76601.223, env_step=67328000, len=512, n/ep=60, n/st=25600, rew=-8878.58]                            


Epoch #526: test_reward: -7492.375000 ± 13894.790843, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #527: 128001it [01:46, 1197.48it/s, agent0/loss=76658.694, env_step=67456000, len=512, n/ep=40, n/st=25600, rew=-7253.75]                             


Epoch #527: test_reward: -9157.550000 ± 14856.586087, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #528: 128001it [01:45, 1216.01it/s, agent0/loss=76949.302, env_step=67584000, len=512, n/ep=60, n/st=25600, rew=-7833.32]                             


Epoch #528: test_reward: -9156.650000 ± 14857.140008, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #529: 128001it [01:48, 1181.15it/s, agent0/loss=76934.026, env_step=67712000, len=512, n/ep=40, n/st=25600, rew=-9268.51]                             


Epoch #529: test_reward: -6664.750000 ± 13307.640810, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #530: 128001it [01:48, 1179.84it/s, agent0/loss=76875.635, env_step=67840000, len=512, n/ep=60, n/st=25600, rew=-8618.23]                            


Epoch #530: test_reward: -7495.475000 ± 13893.125764, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #531: 128001it [01:45, 1211.51it/s, agent0/loss=76916.516, env_step=67968000, len=512, n/ep=40, n/st=25600, rew=-8053.69]                            


Epoch #531: test_reward: -9990.300000 ± 15246.698756, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #532: 128001it [01:48, 1175.16it/s, agent0/loss=77005.972, env_step=68096000, len=512, n/ep=60, n/st=25600, rew=-9547.09]                            


Epoch #532: test_reward: -7495.650000 ± 13893.032093, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #533: 128001it [01:49, 1172.67it/s, agent0/loss=76985.405, env_step=68224000, len=512, n/ep=40, n/st=25600, rew=-8935.41]                             


Epoch #533: test_reward: -12482.925000 ± 16109.349958, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #534: 128001it [01:50, 1163.47it/s, agent0/loss=76770.811, env_step=68352000, len=512, n/ep=60, n/st=25600, rew=-7108.30]                             


Epoch #534: test_reward: -9159.575000 ± 14855.342761, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #535: 128001it [01:48, 1177.08it/s, agent0/loss=76625.577, env_step=68480000, len=512, n/ep=40, n/st=25600, rew=-5885.93]                             


Epoch #535: test_reward: -8327.875000 ± 14406.130440, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #536: 128001it [01:48, 1180.96it/s, agent0/loss=76889.268, env_step=68608000, len=512, n/ep=60, n/st=25600, rew=-9420.77]                            


Epoch #536: test_reward: -9155.150000 ± 14858.061347, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #537: 128001it [01:46, 1205.11it/s, agent0/loss=77085.300, env_step=68736000, len=512, n/ep=40, n/st=25600, rew=-8753.24]                             


Epoch #537: test_reward: -9988.600000 ± 15247.808790, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #538: 128001it [01:47, 1195.04it/s, agent0/loss=77087.185, env_step=68864000, len=512, n/ep=60, n/st=25600, rew=-9855.79]                            


Epoch #538: test_reward: -3335.775000 ± 9981.429218, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #539: 128001it [01:48, 1184.60it/s, agent0/loss=77131.323, env_step=68992000, len=512, n/ep=40, n/st=25600, rew=-6788.69]                            


Epoch #539: test_reward: -5833.425000 ± 12640.983563, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #540: 128001it [01:47, 1186.41it/s, agent0/loss=77048.219, env_step=69120000, len=512, n/ep=60, n/st=25600, rew=-8451.77]                            


Epoch #540: test_reward: -9158.125000 ± 14856.233790, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #541: 128001it [01:48, 1185.05it/s, agent0/loss=76976.255, env_step=69248000, len=512, n/ep=40, n/st=25600, rew=-7736.57]                             


Epoch #541: test_reward: -5002.825000 ± 11878.811956, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #542: 128001it [01:47, 1185.41it/s, agent0/loss=77062.775, env_step=69376000, len=512, n/ep=60, n/st=25600, rew=-7535.88]                            


Epoch #542: test_reward: -9159.400000 ± 14855.449860, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #543: 128001it [01:48, 1182.98it/s, agent0/loss=77187.027, env_step=69504000, len=512, n/ep=40, n/st=25600, rew=-8686.96]                            


Epoch #543: test_reward: -5830.200000 ± 12642.463073, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #544: 128001it [01:49, 1168.37it/s, agent0/loss=77309.879, env_step=69632000, len=512, n/ep=60, n/st=25600, rew=-9786.93]                            


Epoch #544: test_reward: -10820.400000 ± 15584.469368, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #545: 128001it [01:48, 1181.98it/s, agent0/loss=77375.303, env_step=69760000, len=512, n/ep=40, n/st=25600, rew=-10230.70]                            


Epoch #545: test_reward: -8329.350000 ± 14405.280682, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #546: 128001it [01:48, 1178.24it/s, agent0/loss=77219.766, env_step=69888000, len=512, n/ep=60, n/st=25600, rew=-11642.91]                            


Epoch #546: test_reward: -9991.750000 ± 15245.751319, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #547: 128001it [01:49, 1166.16it/s, agent0/loss=77110.770, env_step=70016000, len=512, n/ep=40, n/st=25600, rew=-9667.42]                            


Epoch #547: test_reward: -9993.275000 ± 15244.754805, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #548: 128001it [01:47, 1194.39it/s, agent0/loss=77210.212, env_step=70144000, len=512, n/ep=60, n/st=25600, rew=-8924.12]                            


Epoch #548: test_reward: -8329.125000 ± 14405.409842, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #549: 128001it [01:49, 1171.62it/s, agent0/loss=77385.199, env_step=70272000, len=512, n/ep=40, n/st=25600, rew=-10532.86]                            


Epoch #549: test_reward: -11652.350000 ± 15870.363412, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #550: 128001it [01:49, 1168.97it/s, agent0/loss=77341.635, env_step=70400000, len=512, n/ep=60, n/st=25600, rew=-8480.42]                             


Epoch #550: test_reward: -5837.825000 ± 12638.962388, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #551: 128001it [01:48, 1176.52it/s, agent0/loss=77293.763, env_step=70528000, len=512, n/ep=40, n/st=25600, rew=-9355.61]                            


Epoch #551: test_reward: -9156.725000 ± 14857.094100, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #552: 128001it [01:50, 1159.38it/s, agent0/loss=77483.995, env_step=70656000, len=512, n/ep=60, n/st=25600, rew=-8925.80]                            


Epoch #552: test_reward: -8327.700000 ± 14406.230847, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #553: 128001it [01:50, 1162.52it/s, agent0/loss=77587.684, env_step=70784000, len=512, n/ep=40, n/st=25600, rew=-9096.34]                            


Epoch #553: test_reward: -10823.375000 ± 15582.409053, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #554: 128001it [01:47, 1190.21it/s, agent0/loss=77448.238, env_step=70912000, len=512, n/ep=60, n/st=25600, rew=-8657.47]                            


Epoch #554: test_reward: -7498.475000 ± 13891.513366, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #555: 128001it [01:50, 1158.72it/s, agent0/loss=77290.726, env_step=71040000, len=512, n/ep=40, n/st=25600, rew=-8445.49]                             


Epoch #555: test_reward: -10825.325000 ± 15581.058967, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #556: 128001it [01:48, 1179.43it/s, agent0/loss=77414.485, env_step=71168000, len=512, n/ep=60, n/st=25600, rew=-8738.88]                             


Epoch #556: test_reward: -9158.000000 ± 14856.310331, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #557: 128001it [01:47, 1186.93it/s, agent0/loss=77616.602, env_step=71296000, len=512, n/ep=40, n/st=25600, rew=-7183.41]                            


Epoch #557: test_reward: -9156.725000 ± 14857.094097, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #558: 128001it [01:49, 1169.83it/s, agent0/loss=77612.559, env_step=71424000, len=512, n/ep=60, n/st=25600, rew=-8934.40]                            


Epoch #558: test_reward: -10822.075000 ± 15583.309757, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #559: 128001it [01:47, 1188.84it/s, agent0/loss=77487.090, env_step=71552000, len=512, n/ep=40, n/st=25600, rew=-9570.09]                            


Epoch #559: test_reward: -9158.175000 ± 14856.203157, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #560: 128001it [01:49, 1171.82it/s, agent0/loss=77519.386, env_step=71680000, len=512, n/ep=60, n/st=25600, rew=-9124.52]                             


Epoch #560: test_reward: -5002.325000 ± 11879.019981, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #561: 128001it [01:49, 1171.56it/s, agent0/loss=77580.726, env_step=71808000, len=512, n/ep=40, n/st=25600, rew=-7949.71]                            


Epoch #561: test_reward: -8327.575000 ± 14406.302593, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #562: 128001it [01:48, 1181.65it/s, agent0/loss=77697.632, env_step=71936000, len=512, n/ep=60, n/st=25600, rew=-9661.46]                             


Epoch #562: test_reward: -8329.075000 ± 14405.438533, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #563: 128001it [01:48, 1181.12it/s, agent0/loss=77862.642, env_step=72064000, len=512, n/ep=40, n/st=25600, rew=-9675.48]                            


Epoch #563: test_reward: -7492.750000 ± 13894.590253, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #564: 128001it [01:48, 1176.88it/s, agent0/loss=77791.993, env_step=72192000, len=512, n/ep=60, n/st=25600, rew=-7890.18]                             


Epoch #564: test_reward: -9157.900000 ± 14856.371633, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #565: 128001it [01:48, 1175.05it/s, agent0/loss=77647.840, env_step=72320000, len=512, n/ep=40, n/st=25600, rew=-9738.38]                             


Epoch #565: test_reward: -8324.775000 ± 14407.915367, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #566: 128001it [01:48, 1180.62it/s, agent0/loss=77608.928, env_step=72448000, len=512, n/ep=60, n/st=25600, rew=-8179.04]                            


Epoch #566: test_reward: -9990.150000 ± 15246.796446, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #567: 128001it [01:47, 1187.02it/s, agent0/loss=77730.840, env_step=72576000, len=512, n/ep=40, n/st=25600, rew=-7947.93]                            


Epoch #567: test_reward: -7496.925000 ± 13892.346530, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #568: 128001it [01:49, 1171.52it/s, agent0/loss=77790.484, env_step=72704000, len=512, n/ep=60, n/st=25600, rew=-6820.31]                            


Epoch #568: test_reward: -7495.775000 ± 13892.965170, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #569: 128001it [01:46, 1196.49it/s, agent0/loss=77735.681, env_step=72832000, len=512, n/ep=40, n/st=25600, rew=-6875.88]                            


Epoch #569: test_reward: -10818.900000 ± 15585.507725, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #570: 128001it [01:49, 1171.32it/s, agent0/loss=77572.117, env_step=72960000, len=512, n/ep=60, n/st=25600, rew=-9305.62]                            


Epoch #570: test_reward: -8327.750000 ± 14406.202166, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #571: 128001it [01:48, 1174.86it/s, agent0/loss=77642.409, env_step=73088000, len=512, n/ep=40, n/st=25600, rew=-10302.15]                            


Epoch #571: test_reward: -5837.850000 ± 12638.950923, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #572: 128001it [01:48, 1184.58it/s, agent0/loss=77908.070, env_step=73216000, len=512, n/ep=60, n/st=25600, rew=-9056.62]                             


Epoch #572: test_reward: -7499.875000 ± 13890.760440, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #573: 128001it [01:48, 1174.69it/s, agent0/loss=77988.119, env_step=73344000, len=512, n/ep=40, n/st=25600, rew=-9761.69]                            


Epoch #573: test_reward: -7497.075000 ± 13892.266105, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #574: 128001it [01:47, 1195.97it/s, agent0/loss=78031.628, env_step=73472000, len=512, n/ep=60, n/st=25600, rew=-9817.17]                            


Epoch #574: test_reward: -9985.600000 ± 15249.767704, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #575: 128001it [01:48, 1175.02it/s, agent0/loss=77946.466, env_step=73600000, len=512, n/ep=40, n/st=25600, rew=-8989.20]                            


Epoch #575: test_reward: -9989.975000 ± 15246.910448, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #576: 128001it [01:49, 1173.52it/s, agent0/loss=77922.090, env_step=73728000, len=512, n/ep=60, n/st=25600, rew=-10444.91]                            


Epoch #576: test_reward: -6668.400000 ± 13305.821686, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #577: 128001it [01:46, 1197.71it/s, agent0/loss=77981.349, env_step=73856000, len=512, n/ep=40, n/st=25600, rew=-8174.02]                            


Epoch #577: test_reward: -9159.575000 ± 14855.342685, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #578: 128001it [01:46, 1203.00it/s, agent0/loss=77914.204, env_step=73984000, len=512, n/ep=60, n/st=25600, rew=-7980.93]                            


Epoch #578: test_reward: -7495.450000 ± 13893.139112, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #579: 128001it [01:48, 1182.67it/s, agent0/loss=77858.858, env_step=74112000, len=512, n/ep=40, n/st=25600, rew=-8433.33]                            


Epoch #579: test_reward: -8327.575000 ± 14406.302582, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #580: 128001it [01:46, 1196.63it/s, agent0/loss=77970.009, env_step=74240000, len=512, n/ep=60, n/st=25600, rew=-9815.54]                            


Epoch #580: test_reward: -8329.375000 ± 14405.266335, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #581: 128001it [01:48, 1181.01it/s, agent0/loss=78051.655, env_step=74368000, len=512, n/ep=40, n/st=25600, rew=-8006.82]                             


Epoch #581: test_reward: -4169.325000 ± 11002.822782, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #582: 128001it [01:48, 1184.21it/s, agent0/loss=77992.914, env_step=74496000, len=512, n/ep=60, n/st=25600, rew=-9841.17]                             


Epoch #582: test_reward: -9988.675000 ± 15247.759946, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #583: 128001it [01:47, 1193.64it/s, agent0/loss=78001.865, env_step=74624000, len=512, n/ep=40, n/st=25600, rew=-8131.62]                            


Epoch #583: test_reward: -9989.825000 ± 15247.008124, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #584: 128001it [01:47, 1187.13it/s, agent0/loss=78113.954, env_step=74752000, len=512, n/ep=60, n/st=25600, rew=-8777.39]                             


Epoch #584: test_reward: -9159.625000 ± 14855.312027, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #585: 128001it [01:48, 1184.19it/s, agent0/loss=78126.998, env_step=74880000, len=512, n/ep=40, n/st=25600, rew=-7336.31]                            


Epoch #585: test_reward: -6663.775000 ± 13308.127848, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #586: 128001it [01:49, 1167.65it/s, agent0/loss=78134.081, env_step=75008000, len=512, n/ep=60, n/st=25600, rew=-8874.23]                             


Epoch #586: test_reward: -6665.175000 ± 13307.429680, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #587: 128001it [01:48, 1184.89it/s, agent0/loss=78173.908, env_step=75136000, len=512, n/ep=40, n/st=25600, rew=-9338.61]                            


Epoch #587: test_reward: -8327.650000 ± 14406.259562, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #588: 128001it [01:49, 1173.52it/s, agent0/loss=78148.785, env_step=75264000, len=512, n/ep=60, n/st=25600, rew=-7553.78]                            


Epoch #588: test_reward: -9990.150000 ± 15246.796451, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #589: 128001it [01:48, 1183.95it/s, agent0/loss=78231.551, env_step=75392000, len=512, n/ep=40, n/st=25600, rew=-9838.08]                            


Epoch #589: test_reward: -9158.000000 ± 14856.310312, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #590: 128001it [01:48, 1176.37it/s, agent0/loss=78295.476, env_step=75520000, len=512, n/ep=60, n/st=25600, rew=-9646.70]                            


Epoch #590: test_reward: -8326.125000 ± 14407.137754, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #591: 128001it [01:48, 1182.14it/s, agent0/loss=78323.480, env_step=75648000, len=512, n/ep=40, n/st=25600, rew=-8293.29]                            


Epoch #591: test_reward: -8328.875000 ± 14405.553537, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #592: 128001it [01:48, 1177.39it/s, agent0/loss=78336.534, env_step=75776000, len=512, n/ep=60, n/st=25600, rew=-7486.99]                            


Epoch #592: test_reward: -8326.275000 ± 14407.051695, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #593: 128001it [01:48, 1180.44it/s, agent0/loss=78344.235, env_step=75904000, len=512, n/ep=40, n/st=25600, rew=-7206.82]                             


Epoch #593: test_reward: -8330.475000 ± 14404.631783, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #594: 128001it [01:48, 1181.93it/s, agent0/loss=78345.147, env_step=76032000, len=512, n/ep=60, n/st=25600, rew=-9007.63]                            


Epoch #594: test_reward: -9159.750000 ± 14855.235464, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #595: 128001it [01:49, 1170.81it/s, agent0/loss=78316.027, env_step=76160000, len=512, n/ep=40, n/st=25600, rew=-7942.80]                             


Epoch #595: test_reward: -6663.825000 ± 13308.103033, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #596: 128001it [01:49, 1165.67it/s, agent0/loss=78297.281, env_step=76288000, len=512, n/ep=60, n/st=25600, rew=-8767.43]                            


Epoch #596: test_reward: -8324.675000 ± 14407.972712, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #597: 128001it [01:47, 1187.73it/s, agent0/loss=78309.642, env_step=76416000, len=512, n/ep=40, n/st=25600, rew=-10087.81]                            


Epoch #597: test_reward: -10817.600000 ± 15586.407945, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #598: 128001it [01:48, 1178.91it/s, agent0/loss=78330.883, env_step=76544000, len=512, n/ep=60, n/st=25600, rew=-7583.95]                             


Epoch #598: test_reward: -13313.600000 ± 16302.500343, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #599: 128001it [01:49, 1172.34it/s, agent0/loss=78281.954, env_step=76672000, len=512, n/ep=40, n/st=25600, rew=-9109.42]                            


Epoch #599: test_reward: -7497.300000 ± 13892.145609, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #600: 128001it [01:46, 1200.41it/s, agent0/loss=78372.324, env_step=76800000, len=512, n/ep=60, n/st=25600, rew=-9512.43]                            


Epoch #600: test_reward: -9988.450000 ± 15247.906448, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #601: 128001it [01:47, 1186.42it/s, agent0/loss=78442.822, env_step=76928000, len=512, n/ep=40, n/st=25600, rew=-7691.56]                            


Epoch #601: test_reward: -9987.175000 ± 15248.739526, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #602: 128001it [01:47, 1193.64it/s, agent0/loss=78395.692, env_step=77056000, len=512, n/ep=60, n/st=25600, rew=-9715.25]                            


Epoch #602: test_reward: -10820.575000 ± 15584.348433, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #603: 128001it [01:48, 1179.45it/s, agent0/loss=78456.638, env_step=77184000, len=512, n/ep=40, n/st=25600, rew=-8548.61]                            


Epoch #603: test_reward: -9993.400000 ± 15244.673350, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #604: 128001it [01:47, 1195.62it/s, agent0/loss=78569.323, env_step=77312000, len=512, n/ep=60, n/st=25600, rew=-8277.50]                            


Epoch #604: test_reward: -5836.550000 ± 12639.548728, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #605: 128001it [01:49, 1173.33it/s, agent0/loss=78627.040, env_step=77440000, len=512, n/ep=40, n/st=25600, rew=-8413.94]                            


Epoch #605: test_reward: -9988.100000 ± 15248.134628, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #606: 128001it [01:48, 1184.78it/s, agent0/loss=78607.494, env_step=77568000, len=512, n/ep=60, n/st=25600, rew=-11226.64]                            


Epoch #606: test_reward: -9153.525000 ± 14859.058957, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #607: 128001it [01:48, 1176.88it/s, agent0/loss=78600.370, env_step=77696000, len=512, n/ep=40, n/st=25600, rew=-9297.08]                            


Epoch #607: test_reward: -12483.100000 ± 16109.214959, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #608: 128001it [01:49, 1167.14it/s, agent0/loss=78522.465, env_step=77824000, len=512, n/ep=60, n/st=25600, rew=-9114.28]                             


Epoch #608: test_reward: -9989.875000 ± 15246.975574, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #609: 128001it [01:49, 1167.99it/s, agent0/loss=78419.323, env_step=77952000, len=512, n/ep=40, n/st=25600, rew=-11330.45]                            


Epoch #609: test_reward: -7498.450000 ± 13891.526696, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #610: 128001it [01:47, 1185.25it/s, agent0/loss=78132.604, env_step=78080000, len=512, n/ep=60, n/st=25600, rew=-9416.48]                             


Epoch #610: test_reward: -5836.475000 ± 12639.583011, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #611: 128001it [01:47, 1196.01it/s, agent0/loss=77867.240, env_step=78208000, len=512, n/ep=40, n/st=25600, rew=-11405.58]                            


Epoch #611: test_reward: -6663.700000 ± 13308.165060, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #612: 128001it [01:47, 1193.33it/s, agent0/loss=78085.418, env_step=78336000, len=512, n/ep=60, n/st=25600, rew=-9143.39]                            


Epoch #612: test_reward: -8329.400000 ± 14405.251990, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #613: 128001it [01:47, 1193.02it/s, agent0/loss=78540.694, env_step=78464000, len=512, n/ep=40, n/st=25600, rew=-11028.58]                            


Epoch #613: test_reward: -5828.700000 ± 12643.151148, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #614: 128001it [01:46, 1200.85it/s, agent0/loss=78842.302, env_step=78592000, len=512, n/ep=60, n/st=25600, rew=-8609.17]                             


Epoch #614: test_reward: -12486.250000 ± 16106.779678, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #615: 128001it [01:46, 1198.79it/s, agent0/loss=78913.393, env_step=78720000, len=512, n/ep=40, n/st=25600, rew=-8825.89]                             


Epoch #615: test_reward: -6666.700000 ± 13306.669268, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #616: 128001it [01:47, 1195.49it/s, agent0/loss=78941.217, env_step=78848000, len=512, n/ep=60, n/st=25600, rew=-7426.19]                             


Epoch #616: test_reward: -8327.700000 ± 14406.230848, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #617: 128001it [01:48, 1182.69it/s, agent0/loss=78941.590, env_step=78976000, len=512, n/ep=40, n/st=25600, rew=-7804.31]                            


Epoch #617: test_reward: -9162.425000 ± 14853.590510, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #618: 128001it [01:48, 1182.66it/s, agent0/loss=78880.080, env_step=79104000, len=512, n/ep=60, n/st=25600, rew=-8754.48]                            


Epoch #618: test_reward: -9988.700000 ± 15247.743668, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #619: 128001it [01:47, 1194.90it/s, agent0/loss=78855.136, env_step=79232000, len=512, n/ep=40, n/st=25600, rew=-9454.45]                            


Epoch #619: test_reward: -8330.925000 ± 14404.373328, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #620: 128001it [01:48, 1183.88it/s, agent0/loss=78960.749, env_step=79360000, len=512, n/ep=60, n/st=25600, rew=-8095.81]                            


Epoch #620: test_reward: -5831.650000 ± 12641.797646, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #621: 128001it [01:47, 1187.13it/s, agent0/loss=78983.963, env_step=79488000, len=512, n/ep=40, n/st=25600, rew=-8330.10]                            


Epoch #621: test_reward: -5833.075000 ± 12641.143412, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #622: 128001it [01:46, 1199.84it/s, agent0/loss=78928.563, env_step=79616000, len=512, n/ep=60, n/st=25600, rew=-9197.51]                            


Epoch #622: test_reward: -6666.675000 ± 13306.681657, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #623: 128001it [01:47, 1194.92it/s, agent0/loss=78940.427, env_step=79744000, len=512, n/ep=40, n/st=25600, rew=-6751.30]                            


Epoch #623: test_reward: -9156.750000 ± 14857.078794, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #624: 128001it [01:47, 1193.01it/s, agent0/loss=78989.621, env_step=79872000, len=512, n/ep=60, n/st=25600, rew=-8593.12]                            


Epoch #624: test_reward: -7502.025000 ± 13889.605310, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #625: 128001it [01:48, 1179.42it/s, agent0/loss=79058.432, env_step=80000000, len=512, n/ep=40, n/st=25600, rew=-7404.77]                            


Epoch #625: test_reward: -6667.775000 ± 13306.132255, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #626: 128001it [01:49, 1166.80it/s, agent0/loss=79105.507, env_step=80128000, len=512, n/ep=60, n/st=25600, rew=-8616.48]                            


Epoch #626: test_reward: -9991.875000 ± 15245.669884, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #627: 128001it [01:48, 1181.12it/s, agent0/loss=79062.769, env_step=80256000, len=512, n/ep=40, n/st=25600, rew=-7873.74]                            


Epoch #627: test_reward: -7494.175000 ± 13893.824615, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #628: 128001it [01:46, 1197.55it/s, agent0/loss=78785.023, env_step=80384000, len=512, n/ep=60, n/st=25600, rew=-8029.88]                            


Epoch #628: test_reward: -9990.250000 ± 15246.731310, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #629: 128001it [01:48, 1184.20it/s, agent0/loss=78611.060, env_step=80512000, len=512, n/ep=40, n/st=25600, rew=-7991.18]                            


Epoch #629: test_reward: -10819.150000 ± 15585.335150, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #630: 128001it [01:47, 1189.36it/s, agent0/loss=78901.079, env_step=80640000, len=512, n/ep=60, n/st=25600, rew=-9770.15]                            


Epoch #630: test_reward: -6663.850000 ± 13308.090632, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #631: 128001it [01:47, 1192.82it/s, agent0/loss=79225.593, env_step=80768000, len=512, n/ep=40, n/st=25600, rew=-7641.25]                            


Epoch #631: test_reward: -6662.350000 ± 13308.838256, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #632: 128001it [01:46, 1201.38it/s, agent0/loss=79219.285, env_step=80896000, len=512, n/ep=60, n/st=25600, rew=-8215.58]                            


Epoch #632: test_reward: -7498.425000 ± 13891.540039, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #633: 128001it [01:49, 1167.97it/s, agent0/loss=78966.896, env_step=81024000, len=512, n/ep=40, n/st=25600, rew=-9365.44]                            


Epoch #633: test_reward: -6665.300000 ± 13307.367621, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #634: 128001it [01:49, 1169.49it/s, agent0/loss=78952.767, env_step=81152000, len=512, n/ep=60, n/st=25600, rew=-8091.87]                            


Epoch #634: test_reward: -7492.700000 ± 13894.616990, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #635: 128001it [01:48, 1181.32it/s, agent0/loss=79195.700, env_step=81280000, len=512, n/ep=40, n/st=25600, rew=-9931.52]                            


Epoch #635: test_reward: -6665.150000 ± 13307.442122, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #636: 128001it [01:47, 1195.27it/s, agent0/loss=79252.257, env_step=81408000, len=512, n/ep=60, n/st=25600, rew=-10189.94]                            


Epoch #636: test_reward: -10822.175000 ± 15583.240666, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #637: 128001it [01:48, 1182.66it/s, agent0/loss=79159.358, env_step=81536000, len=512, n/ep=40, n/st=25600, rew=-7850.62]                            


Epoch #637: test_reward: -9159.750000 ± 14855.235462, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #638: 128001it [01:47, 1192.83it/s, agent0/loss=79231.748, env_step=81664000, len=512, n/ep=60, n/st=25600, rew=-9522.98]                            


Epoch #638: test_reward: -5001.050000 ± 11879.554137, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #639: 128001it [01:47, 1188.85it/s, agent0/loss=79393.989, env_step=81792000, len=512, n/ep=40, n/st=25600, rew=-6790.49]                            


Epoch #639: test_reward: -9993.400000 ± 15244.673358, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #640: 128001it [01:49, 1173.98it/s, agent0/loss=79460.637, env_step=81920000, len=512, n/ep=60, n/st=25600, rew=-10055.66]                            


Epoch #640: test_reward: -9159.800000 ± 14855.204842, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #641: 128001it [01:47, 1191.09it/s, agent0/loss=79481.221, env_step=82048000, len=512, n/ep=40, n/st=25600, rew=-6156.44]                            


Epoch #641: test_reward: -8326.175000 ± 14407.109063, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #642: 128001it [01:48, 1179.94it/s, agent0/loss=79429.749, env_step=82176000, len=512, n/ep=60, n/st=25600, rew=-9718.97]                            


Epoch #642: test_reward: -9156.525000 ± 14857.216548, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #643: 128001it [01:47, 1193.37it/s, agent0/loss=79378.793, env_step=82304000, len=512, n/ep=40, n/st=25600, rew=-10323.42]                            


Epoch #643: test_reward: -5830.750000 ± 12642.208992, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #644: 128001it [01:50, 1162.53it/s, agent0/loss=79136.784, env_step=82432000, len=512, n/ep=60, n/st=25600, rew=-8476.99]                            


Epoch #644: test_reward: -9991.800000 ± 15245.718739, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #645: 128001it [01:47, 1189.73it/s, agent0/loss=79189.339, env_step=82560000, len=512, n/ep=40, n/st=25600, rew=-8589.05]                            


Epoch #645: test_reward: -6668.450000 ± 13305.796848, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #646: 128001it [01:48, 1179.05it/s, agent0/loss=79402.573, env_step=82688000, len=512, n/ep=60, n/st=25600, rew=-8191.39]                            


Epoch #646: test_reward: -9988.725000 ± 15247.727395, best_reward: -2509.725000 ± 8761.792011 in #446


Epoch #647: 128001it [01:49, 1171.79it/s, agent0/loss=79430.147, env_step=82816000, len=512, n/ep=40, n/st=25600, rew=-8182.40]                            


Best Saved
Epoch #647: test_reward: -2506.900000 ± 8762.591611, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #648: 128001it [01:47, 1188.52it/s, agent0/loss=79572.357, env_step=82944000, len=512, n/ep=60, n/st=25600, rew=-8593.83]                            


Epoch #648: test_reward: -6666.800000 ± 13306.619596, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #649: 128001it [01:47, 1191.61it/s, agent0/loss=79600.493, env_step=83072000, len=512, n/ep=40, n/st=25600, rew=-8566.56]                            


Epoch #649: test_reward: -10820.600000 ± 15584.331169, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #650: 128001it [01:47, 1188.17it/s, agent0/loss=79567.905, env_step=83200000, len=512, n/ep=60, n/st=25600, rew=-7864.45]                            


Epoch #650: test_reward: -7498.575000 ± 13891.459846, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #651: 128001it [01:48, 1180.96it/s, agent0/loss=79580.667, env_step=83328000, len=512, n/ep=40, n/st=25600, rew=-9419.67]                            


Epoch #651: test_reward: -5001.125000 ± 11879.522907, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #652: 128001it [01:48, 1179.59it/s, agent0/loss=79612.591, env_step=83456000, len=512, n/ep=60, n/st=25600, rew=-9932.08]                            


Epoch #652: test_reward: -6666.575000 ± 13306.731296, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #653: 128001it [01:47, 1185.94it/s, agent0/loss=79664.465, env_step=83584000, len=512, n/ep=40, n/st=25600, rew=-6673.18]                            


Epoch #653: test_reward: -9159.825000 ± 14855.189531, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #654: 128001it [01:49, 1171.69it/s, agent0/loss=79675.969, env_step=83712000, len=512, n/ep=60, n/st=25600, rew=-9708.79]                            


Epoch #654: test_reward: -9159.125000 ± 14855.618518, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #655: 128001it [01:49, 1169.31it/s, agent0/loss=79639.723, env_step=83840000, len=512, n/ep=40, n/st=25600, rew=-9617.98]                            


Epoch #655: test_reward: -7497.250000 ± 13892.172398, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #656: 128001it [01:48, 1175.76it/s, agent0/loss=79727.548, env_step=83968000, len=512, n/ep=60, n/st=25600, rew=-9265.76]                            


Epoch #656: test_reward: -8322.850000 ± 14409.022539, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #657: 128001it [01:48, 1179.11it/s, agent0/loss=79766.281, env_step=84096000, len=512, n/ep=40, n/st=25600, rew=-8703.10]                            


Epoch #657: test_reward: -9156.625000 ± 14857.155316, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #658: 128001it [01:48, 1182.31it/s, agent0/loss=79681.518, env_step=84224000, len=512, n/ep=60, n/st=25600, rew=-7011.78]                             


Epoch #658: test_reward: -8327.750000 ± 14406.202166, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #659: 128001it [01:48, 1184.03it/s, agent0/loss=79551.121, env_step=84352000, len=512, n/ep=40, n/st=25600, rew=-6883.09]                            


Epoch #659: test_reward: -5834.700000 ± 12640.397698, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #660: 128001it [01:46, 1197.19it/s, agent0/loss=79390.805, env_step=84480000, len=512, n/ep=60, n/st=25600, rew=-6329.75]                            


Epoch #660: test_reward: -7497.175000 ± 13892.212518, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #661: 128001it [01:48, 1180.46it/s, agent0/loss=79281.582, env_step=84608000, len=512, n/ep=40, n/st=25600, rew=-9732.66]                             


Epoch #661: test_reward: -7495.250000 ± 13893.246238, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #662: 128001it [01:48, 1179.95it/s, agent0/loss=79481.885, env_step=84736000, len=512, n/ep=60, n/st=25600, rew=-8069.93]                            


Epoch #662: test_reward: -9155.175000 ± 14858.046047, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #663: 128001it [01:48, 1182.31it/s, agent0/loss=79626.082, env_step=84864000, len=512, n/ep=40, n/st=25600, rew=-8562.04]                             


Epoch #663: test_reward: -7495.900000 ± 13892.898277, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #664: 128001it [01:48, 1175.52it/s, agent0/loss=79377.959, env_step=84992000, len=512, n/ep=60, n/st=25600, rew=-8507.41]                            


Epoch #664: test_reward: -8326.000000 ± 14407.209492, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #665: 128001it [01:48, 1178.45it/s, agent0/loss=79483.525, env_step=85120000, len=512, n/ep=40, n/st=25600, rew=-10884.51]                            


Epoch #665: test_reward: -9988.775000 ± 15247.694843, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #666: 128001it [01:47, 1190.41it/s, agent0/loss=79776.302, env_step=85248000, len=512, n/ep=60, n/st=25600, rew=-9168.32]                            


Epoch #666: test_reward: -9993.375000 ± 15244.689640, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #667: 128001it [01:45, 1208.01it/s, agent0/loss=79821.355, env_step=85376000, len=512, n/ep=40, n/st=25600, rew=-8938.77]                             


Epoch #667: test_reward: -9162.800000 ± 14853.360686, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #668: 128001it [01:47, 1189.89it/s, agent0/loss=79950.921, env_step=85504000, len=512, n/ep=60, n/st=25600, rew=-8616.80]                            


Epoch #668: test_reward: -5836.725000 ± 12639.466492, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #669: 128001it [01:47, 1188.67it/s, agent0/loss=80027.956, env_step=85632000, len=512, n/ep=40, n/st=25600, rew=-8404.01]                            


Epoch #669: test_reward: -8326.175000 ± 14407.109077, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #670: 128001it [01:46, 1198.24it/s, agent0/loss=79977.905, env_step=85760000, len=512, n/ep=60, n/st=25600, rew=-8962.52]                            


Epoch #670: test_reward: -9991.725000 ± 15245.767588, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #671: 128001it [01:46, 1202.66it/s, agent0/loss=80043.506, env_step=85888000, len=512, n/ep=40, n/st=25600, rew=-8885.62]                            


Epoch #671: test_reward: -9988.700000 ± 15247.743665, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #672: 128001it [01:48, 1179.92it/s, agent0/loss=80188.108, env_step=86016000, len=512, n/ep=60, n/st=25600, rew=-9051.42]                            


Epoch #672: test_reward: -7498.475000 ± 13891.513265, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #673: 128001it [01:47, 1189.90it/s, agent0/loss=80235.443, env_step=86144000, len=512, n/ep=40, n/st=25600, rew=-7996.41]                            


Epoch #673: test_reward: -6665.400000 ± 13307.317988, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #674: 128001it [01:47, 1186.43it/s, agent0/loss=80154.469, env_step=86272000, len=512, n/ep=60, n/st=25600, rew=-11154.42]                            


Epoch #674: test_reward: -10820.525000 ± 15584.383008, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #675: 128001it [01:48, 1176.88it/s, agent0/loss=80139.474, env_step=86400000, len=512, n/ep=40, n/st=25600, rew=-7427.38]                             


Epoch #675: test_reward: -9991.575000 ± 15245.865295, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #676: 128001it [01:49, 1173.15it/s, agent0/loss=80218.072, env_step=86528000, len=512, n/ep=60, n/st=25600, rew=-9752.01]                             


Epoch #676: test_reward: -10821.975000 ± 15583.378802, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #677: 128001it [01:48, 1183.17it/s, agent0/loss=80252.177, env_step=86656000, len=512, n/ep=40, n/st=25600, rew=-6397.82]                             


Epoch #677: test_reward: -11654.300000 ± 15868.935962, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #678: 128001it [01:49, 1172.93it/s, agent0/loss=80140.342, env_step=86784000, len=512, n/ep=60, n/st=25600, rew=-9937.65]                             


Epoch #678: test_reward: -5005.650000 ± 11877.628447, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #679: 128001it [01:47, 1190.58it/s, agent0/loss=80082.977, env_step=86912000, len=512, n/ep=40, n/st=25600, rew=-7453.10]                            


Epoch #679: test_reward: -8324.600000 ± 14408.015744, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #680: 128001it [01:49, 1170.32it/s, agent0/loss=79994.868, env_step=87040000, len=512, n/ep=60, n/st=25600, rew=-9303.67]                             


Epoch #680: test_reward: -9988.325000 ± 15247.988035, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #681: 128001it [01:47, 1187.81it/s, agent0/loss=80039.396, env_step=87168000, len=512, n/ep=40, n/st=25600, rew=-7103.45]                            


Epoch #681: test_reward: -9159.775000 ± 14855.220152, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #682: 128001it [01:47, 1189.84it/s, agent0/loss=80268.923, env_step=87296000, len=512, n/ep=60, n/st=25600, rew=-7528.73]                            


Epoch #682: test_reward: -8328.975000 ± 14405.495952, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #683: 128001it [01:46, 1196.34it/s, agent0/loss=80342.452, env_step=87424000, len=512, n/ep=40, n/st=25600, rew=-8574.90]                             


Epoch #683: test_reward: -6665.275000 ± 13307.380016, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #684: 128001it [01:49, 1171.17it/s, agent0/loss=80322.685, env_step=87552000, len=512, n/ep=60, n/st=25600, rew=-8263.41]                            


Epoch #684: test_reward: -8329.275000 ± 14405.323771, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #685: 128001it [01:47, 1194.76it/s, agent0/loss=80279.293, env_step=87680000, len=512, n/ep=40, n/st=25600, rew=-9371.58]                             


Epoch #685: test_reward: -7498.425000 ± 13891.540071, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #686: 128001it [01:47, 1195.28it/s, agent0/loss=80220.756, env_step=87808000, len=512, n/ep=60, n/st=25600, rew=-9267.26]                             


Epoch #686: test_reward: -6668.225000 ± 13305.908606, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #687: 128001it [01:48, 1177.20it/s, agent0/loss=80092.936, env_step=87936000, len=512, n/ep=40, n/st=25600, rew=-6938.06]                            


Epoch #687: test_reward: -7497.325000 ± 13892.132225, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #688: 128001it [01:48, 1177.89it/s, agent0/loss=80261.658, env_step=88064000, len=512, n/ep=60, n/st=25600, rew=-9197.70]                            


Epoch #688: test_reward: -8332.250000 ± 14403.609433, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #689: 128001it [01:47, 1188.23it/s, agent0/loss=80484.607, env_step=88192000, len=512, n/ep=40, n/st=25600, rew=-7824.85]                            


Epoch #689: test_reward: -9155.125000 ± 14858.076647, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #690: 128001it [01:49, 1173.18it/s, agent0/loss=80455.110, env_step=88320000, len=512, n/ep=60, n/st=25600, rew=-8552.37]                            


Epoch #690: test_reward: -8324.625000 ± 14408.001389, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #691: 128001it [01:48, 1180.46it/s, agent0/loss=80463.466, env_step=88448000, len=512, n/ep=40, n/st=25600, rew=-7675.04]                             


Epoch #691: test_reward: -5004.275000 ± 11878.204656, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #692: 128001it [01:49, 1167.47it/s, agent0/loss=80456.916, env_step=88576000, len=512, n/ep=60, n/st=25600, rew=-9110.95]                            


Epoch #692: test_reward: -8330.475000 ± 14404.631753, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #693: 128001it [01:47, 1194.35it/s, agent0/loss=80193.180, env_step=88704000, len=512, n/ep=40, n/st=25600, rew=-7520.69]                            


Epoch #693: test_reward: -6666.375000 ± 13306.830939, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #694: 128001it [01:48, 1179.26it/s, agent0/loss=80019.539, env_step=88832000, len=512, n/ep=60, n/st=25600, rew=-10418.78]                            


Epoch #694: test_reward: -6663.725000 ± 13308.152648, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #695: 128001it [01:47, 1193.35it/s, agent0/loss=80309.493, env_step=88960000, len=512, n/ep=40, n/st=25600, rew=-9734.91]                             


Epoch #695: test_reward: -9988.300000 ± 15248.004216, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #696: 128001it [01:48, 1177.05it/s, agent0/loss=80536.579, env_step=89088000, len=512, n/ep=60, n/st=25600, rew=-7443.52]                            


Epoch #696: test_reward: -7497.075000 ± 13892.266070, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #697: 128001it [01:47, 1188.00it/s, agent0/loss=80591.834, env_step=89216000, len=512, n/ep=40, n/st=25600, rew=-10101.86]                            


Epoch #697: test_reward: -8326.325000 ± 14407.023017, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #698: 128001it [01:48, 1178.08it/s, agent0/loss=80702.594, env_step=89344000, len=512, n/ep=60, n/st=25600, rew=-7404.88]                            


Epoch #698: test_reward: -4177.825000 ± 10999.620286, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #699: 128001it [01:48, 1180.03it/s, agent0/loss=80745.149, env_step=89472000, len=512, n/ep=40, n/st=25600, rew=-7342.00]                            


Epoch #699: test_reward: -8330.725000 ± 14404.488141, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #700: 128001it [01:49, 1173.66it/s, agent0/loss=80570.954, env_step=89600000, len=512, n/ep=60, n/st=25600, rew=-8531.88]                            


Epoch #700: test_reward: -9164.250000 ± 14852.469043, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #701: 128001it [01:48, 1176.69it/s, agent0/loss=80540.624, env_step=89728000, len=512, n/ep=40, n/st=25600, rew=-8634.06]                            


Epoch #701: test_reward: -5836.150000 ± 12639.731537, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #702: 128001it [01:47, 1188.33it/s, agent0/loss=80722.617, env_step=89856000, len=512, n/ep=60, n/st=25600, rew=-9016.90]                             


Epoch #702: test_reward: -3340.525000 ± 9979.855613, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #703: 128001it [01:48, 1175.54it/s, agent0/loss=80789.703, env_step=89984000, len=512, n/ep=40, n/st=25600, rew=-7125.73]                            


Epoch #703: test_reward: -8332.300000 ± 14403.580604, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #704: 128001it [01:46, 1197.19it/s, agent0/loss=80796.681, env_step=90112000, len=512, n/ep=60, n/st=25600, rew=-8743.67]                             


Epoch #704: test_reward: -8324.600000 ± 14408.015734, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #705: 128001it [01:47, 1194.94it/s, agent0/loss=80792.482, env_step=90240000, len=512, n/ep=40, n/st=25600, rew=-9552.05]                             


Epoch #705: test_reward: -9988.725000 ± 15247.727395, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #706: 128001it [01:48, 1179.77it/s, agent0/loss=80759.246, env_step=90368000, len=512, n/ep=60, n/st=25600, rew=-7858.91]                            


Epoch #706: test_reward: -6667.000000 ± 13306.520317, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #707: 128001it [01:48, 1181.14it/s, agent0/loss=80842.111, env_step=90496000, len=512, n/ep=40, n/st=25600, rew=-8118.05]                            


Epoch #707: test_reward: -5835.000000 ± 12640.260553, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #708: 128001it [01:49, 1167.81it/s, agent0/loss=80934.435, env_step=90624000, len=512, n/ep=60, n/st=25600, rew=-9804.85]                             


Epoch #708: test_reward: -12484.725000 ± 16107.958887, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #709: 128001it [01:47, 1185.56it/s, agent0/loss=80886.521, env_step=90752000, len=512, n/ep=40, n/st=25600, rew=-9632.01]                            


Epoch #709: test_reward: -6661.575000 ± 13309.223260, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #710: 128001it [01:46, 1202.03it/s, agent0/loss=80875.661, env_step=90880000, len=512, n/ep=60, n/st=25600, rew=-8601.27]                             


Epoch #710: test_reward: -6663.350000 ± 13308.338787, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #711: 128001it [01:47, 1187.13it/s, agent0/loss=80915.455, env_step=91008000, len=512, n/ep=40, n/st=25600, rew=-6740.82]                            


Epoch #711: test_reward: -8325.950000 ± 14407.238191, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #712: 128001it [01:49, 1167.66it/s, agent0/loss=81014.636, env_step=91136000, len=512, n/ep=60, n/st=25600, rew=-9761.12]                            


Epoch #712: test_reward: -7492.800000 ± 13894.563518, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #713: 128001it [01:47, 1193.00it/s, agent0/loss=81085.546, env_step=91264000, len=512, n/ep=40, n/st=25600, rew=-7926.36]                            


Epoch #713: test_reward: -10822.075000 ± 15583.309724, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #714: 128001it [01:50, 1161.87it/s, agent0/loss=81079.555, env_step=91392000, len=512, n/ep=60, n/st=25600, rew=-9506.38]                            


Epoch #714: test_reward: -7498.600000 ± 13891.446373, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #715: 128001it [01:48, 1178.93it/s, agent0/loss=81027.789, env_step=91520000, len=512, n/ep=40, n/st=25600, rew=-7811.50]                            


Epoch #715: test_reward: -10823.900000 ± 15582.046343, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #716: 128001it [01:47, 1190.10it/s, agent0/loss=81059.182, env_step=91648000, len=512, n/ep=60, n/st=25600, rew=-7538.47]                            


Epoch #716: test_reward: -7490.625000 ± 13895.730335, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #717: 128001it [01:47, 1196.12it/s, agent0/loss=81127.412, env_step=91776000, len=512, n/ep=40, n/st=25600, rew=-8998.90]                            


Epoch #717: test_reward: -7494.225000 ± 13893.797878, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #718: 128001it [01:47, 1190.08it/s, agent0/loss=81154.158, env_step=91904000, len=512, n/ep=60, n/st=25600, rew=-8688.93]                            


Epoch #718: test_reward: -9988.675000 ± 15247.759943, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #719: 128001it [01:47, 1186.26it/s, agent0/loss=81105.494, env_step=92032000, len=512, n/ep=40, n/st=25600, rew=-8309.86]                             


Epoch #719: test_reward: -5834.575000 ± 12640.454812, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #720: 128001it [01:48, 1177.23it/s, agent0/loss=81065.190, env_step=92160000, len=512, n/ep=60, n/st=25600, rew=-8444.62]                             


Epoch #720: test_reward: -9156.725000 ± 14857.094100, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #721: 128001it [01:45, 1207.68it/s, agent0/loss=81121.963, env_step=92288000, len=512, n/ep=40, n/st=25600, rew=-11010.62]                            


Epoch #721: test_reward: -9162.950000 ± 14853.268751, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #722: 128001it [01:49, 1172.51it/s, agent0/loss=81181.234, env_step=92416000, len=512, n/ep=60, n/st=25600, rew=-8939.77]                            


Epoch #722: test_reward: -9988.700000 ± 15247.743673, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #723: 128001it [01:49, 1169.80it/s, agent0/loss=81198.180, env_step=92544000, len=512, n/ep=40, n/st=25600, rew=-8764.85]                            


Epoch #723: test_reward: -8327.625000 ± 14406.273950, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #724: 128001it [01:47, 1187.29it/s, agent0/loss=81223.140, env_step=92672000, len=512, n/ep=60, n/st=25600, rew=-7773.08]                            


Epoch #724: test_reward: -13315.000000 ± 16301.359324, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #725: 128001it [01:49, 1167.49it/s, agent0/loss=81267.874, env_step=92800000, len=512, n/ep=40, n/st=25600, rew=-8890.62]                            


Epoch #725: test_reward: -8324.275000 ± 14408.202242, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #726: 128001it [01:48, 1175.86it/s, agent0/loss=81345.089, env_step=92928000, len=512, n/ep=60, n/st=25600, rew=-6944.24]                            


Epoch #726: test_reward: -8330.675000 ± 14404.516903, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #727: 128001it [01:48, 1180.96it/s, agent0/loss=81441.709, env_step=93056000, len=512, n/ep=40, n/st=25600, rew=-6927.88]                            


Epoch #727: test_reward: -10822.175000 ± 15583.240669, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #728: 128001it [01:47, 1188.25it/s, agent0/loss=81412.974, env_step=93184000, len=512, n/ep=60, n/st=25600, rew=-7388.23]                             


Epoch #728: test_reward: -9156.675000 ± 14857.124704, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #729: 128001it [01:48, 1178.73it/s, agent0/loss=81359.435, env_step=93312000, len=512, n/ep=40, n/st=25600, rew=-8229.35]                             


Epoch #729: test_reward: -9988.475000 ± 15247.890172, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #730: 128001it [01:46, 1199.63it/s, agent0/loss=81351.722, env_step=93440000, len=512, n/ep=60, n/st=25600, rew=-9840.73]                            


Epoch #730: test_reward: -11652.500000 ± 15870.253769, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #731: 128001it [01:47, 1187.99it/s, agent0/loss=81412.317, env_step=93568000, len=512, n/ep=40, n/st=25600, rew=-9602.56]                            


Epoch #731: test_reward: -7495.725000 ± 13892.991917, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #732: 128001it [01:49, 1172.16it/s, agent0/loss=81478.113, env_step=93696000, len=512, n/ep=60, n/st=25600, rew=-9581.76]                            


Epoch #732: test_reward: -8326.325000 ± 14407.023014, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #733: 128001it [01:47, 1185.22it/s, agent0/loss=81440.660, env_step=93824000, len=512, n/ep=40, n/st=25600, rew=-10239.75]                            


Epoch #733: test_reward: -9154.825000 ± 14858.260361, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #734: 128001it [01:49, 1174.32it/s, agent0/loss=81385.895, env_step=93952000, len=512, n/ep=60, n/st=25600, rew=-9456.10]                             


Epoch #734: test_reward: -9161.450000 ± 14854.190972, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #735: 128001it [01:49, 1169.15it/s, agent0/loss=81341.121, env_step=94080000, len=512, n/ep=40, n/st=25600, rew=-8541.10]                            


Epoch #735: test_reward: -9161.025000 ± 14854.451374, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #736: 128001it [01:46, 1198.42it/s, agent0/loss=81424.619, env_step=94208000, len=512, n/ep=60, n/st=25600, rew=-8002.48]                            


Epoch #736: test_reward: -7498.025000 ± 13891.754332, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #737: 128001it [01:46, 1196.84it/s, agent0/loss=81460.704, env_step=94336000, len=512, n/ep=40, n/st=25600, rew=-8039.02]                            


Epoch #737: test_reward: -9161.500000 ± 14854.160347, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #738: 128001it [01:47, 1191.79it/s, agent0/loss=81476.842, env_step=94464000, len=512, n/ep=60, n/st=25600, rew=-10770.57]                            


Epoch #738: test_reward: -9991.700000 ± 15245.783875, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #739: 128001it [01:47, 1186.33it/s, agent0/loss=81630.519, env_step=94592000, len=512, n/ep=40, n/st=25600, rew=-7947.64]                            


Epoch #739: test_reward: -8326.050000 ± 14407.180775, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #740: 128001it [01:47, 1190.23it/s, agent0/loss=81651.037, env_step=94720000, len=512, n/ep=60, n/st=25600, rew=-9121.96]                             


Epoch #740: test_reward: -8331.075000 ± 14404.287218, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #741: 128001it [01:48, 1176.37it/s, agent0/loss=81659.305, env_step=94848000, len=512, n/ep=40, n/st=25600, rew=-8049.71]                            


Epoch #741: test_reward: -9158.175000 ± 14856.203160, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #742: 128001it [01:48, 1184.26it/s, agent0/loss=81545.186, env_step=94976000, len=512, n/ep=60, n/st=25600, rew=-8409.44]                             


Epoch #742: test_reward: -9156.425000 ± 14857.277762, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #743: 128001it [01:49, 1163.69it/s, agent0/loss=81372.830, env_step=95104000, len=512, n/ep=40, n/st=25600, rew=-8091.29]                            


Epoch #743: test_reward: -8327.750000 ± 14406.202178, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #744: 128001it [01:49, 1172.70it/s, agent0/loss=81481.670, env_step=95232000, len=512, n/ep=60, n/st=25600, rew=-7442.07]                            


Epoch #744: test_reward: -6665.400000 ± 13307.317988, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #745: 128001it [01:48, 1175.87it/s, agent0/loss=81657.851, env_step=95360000, len=512, n/ep=40, n/st=25600, rew=-7675.16]                             


Epoch #745: test_reward: -9159.475000 ± 14855.403975, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #746: 128001it [01:49, 1165.66it/s, agent0/loss=81723.176, env_step=95488000, len=512, n/ep=60, n/st=25600, rew=-7674.66]                             


Epoch #746: test_reward: -7492.700000 ± 13894.616992, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #747: 128001it [01:50, 1161.04it/s, agent0/loss=81725.942, env_step=95616000, len=512, n/ep=40, n/st=25600, rew=-8904.66]                            


Epoch #747: test_reward: -6665.525000 ± 13307.255968, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #748: 128001it [01:49, 1174.17it/s, agent0/loss=81731.262, env_step=95744000, len=512, n/ep=60, n/st=25600, rew=-8023.82]                            


Epoch #748: test_reward: -6671.275000 ± 13304.386666, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #749: 128001it [01:48, 1174.68it/s, agent0/loss=81660.158, env_step=95872000, len=512, n/ep=40, n/st=25600, rew=-6393.85]                            


Epoch #749: test_reward: -8324.525000 ± 14408.058757, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #750: 128001it [01:49, 1170.75it/s, agent0/loss=81526.705, env_step=96000000, len=512, n/ep=60, n/st=25600, rew=-7245.27]                            


Epoch #750: test_reward: -8331.025000 ± 14404.315922, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #751: 128001it [01:49, 1166.80it/s, agent0/loss=81598.211, env_step=96128000, len=512, n/ep=40, n/st=25600, rew=-7233.56]                            


Epoch #751: test_reward: -5832.400000 ± 12641.452220, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #752: 128001it [01:49, 1171.65it/s, agent0/loss=81776.269, env_step=96256000, len=512, n/ep=60, n/st=25600, rew=-9601.64]                            


Epoch #752: test_reward: -9161.275000 ± 14854.298176, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #753: 128001it [01:48, 1176.72it/s, agent0/loss=81890.511, env_step=96384000, len=512, n/ep=40, n/st=25600, rew=-8575.54]                            


Epoch #753: test_reward: -9993.100000 ± 15244.868877, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #754: 128001it [01:48, 1182.15it/s, agent0/loss=81910.600, env_step=96512000, len=512, n/ep=60, n/st=25600, rew=-9356.26]                             


Epoch #754: test_reward: -6664.350000 ± 13307.839638, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #755: 128001it [01:49, 1166.29it/s, agent0/loss=81855.309, env_step=96640000, len=512, n/ep=40, n/st=25600, rew=-9128.14]                             


Epoch #755: test_reward: -7496.200000 ± 13892.735892, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #756: 128001it [01:50, 1160.20it/s, agent0/loss=81839.276, env_step=96768000, len=512, n/ep=60, n/st=25600, rew=-9120.20]                            


Epoch #756: test_reward: -9991.675000 ± 15245.800158, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #757: 128001it [01:49, 1164.65it/s, agent0/loss=81947.350, env_step=96896000, len=512, n/ep=40, n/st=25600, rew=-9009.36]                            


Epoch #757: test_reward: -7495.700000 ± 13893.005312, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #758: 128001it [01:48, 1177.57it/s, agent0/loss=82053.999, env_step=97024000, len=512, n/ep=60, n/st=25600, rew=-9974.61]                            


Epoch #758: test_reward: -5001.875000 ± 11879.207642, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #759: 128001it [01:49, 1166.12it/s, agent0/loss=82034.059, env_step=97152000, len=512, n/ep=40, n/st=25600, rew=-6784.66]                             


Epoch #759: test_reward: -9985.575000 ± 15249.783970, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #760: 128001it [01:49, 1166.96it/s, agent0/loss=82000.293, env_step=97280000, len=512, n/ep=60, n/st=25600, rew=-7904.38]                            


Epoch #760: test_reward: -7491.175000 ± 13895.435896, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #761: 128001it [01:47, 1186.53it/s, agent0/loss=82009.436, env_step=97408000, len=512, n/ep=40, n/st=25600, rew=-9117.54]                            


Epoch #761: test_reward: -9990.150000 ± 15246.796446, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #762: 128001it [01:50, 1161.17it/s, agent0/loss=82029.192, env_step=97536000, len=512, n/ep=60, n/st=25600, rew=-9081.79]                            


Epoch #762: test_reward: -8332.500000 ± 14403.465786, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #763: 128001it [01:48, 1174.36it/s, agent0/loss=81919.178, env_step=97664000, len=512, n/ep=40, n/st=25600, rew=-8698.21]                             


Epoch #763: test_reward: -7498.775000 ± 13891.352647, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #764: 128001it [01:50, 1159.24it/s, agent0/loss=81867.131, env_step=97792000, len=512, n/ep=60, n/st=25600, rew=-9653.55]                             


Epoch #764: test_reward: -11655.925000 ± 15867.745742, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #765: 128001it [01:48, 1175.33it/s, agent0/loss=81929.793, env_step=97920000, len=512, n/ep=40, n/st=25600, rew=-8988.02]                             


Epoch #765: test_reward: -7493.975000 ± 13893.931637, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #766: 128001it [01:47, 1185.57it/s, agent0/loss=82030.830, env_step=98048000, len=512, n/ep=60, n/st=25600, rew=-7005.92]                            


Epoch #766: test_reward: -8327.600000 ± 14406.288238, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #767: 128001it [01:49, 1168.03it/s, agent0/loss=82188.723, env_step=98176000, len=512, n/ep=40, n/st=25600, rew=-7807.93]                             


Epoch #767: test_reward: -8328.850000 ± 14405.567793, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #768: 128001it [01:50, 1160.86it/s, agent0/loss=82294.723, env_step=98304000, len=512, n/ep=60, n/st=25600, rew=-8649.29]                             


Epoch #768: test_reward: -8327.800000 ± 14406.173470, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #769: 128001it [01:49, 1174.16it/s, agent0/loss=82296.702, env_step=98432000, len=512, n/ep=40, n/st=25600, rew=-9711.56]                             


Epoch #769: test_reward: -9158.050000 ± 14856.279792, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #770: 128001it [01:49, 1167.30it/s, agent0/loss=82015.496, env_step=98560000, len=512, n/ep=60, n/st=25600, rew=-8509.73]                            


Epoch #770: test_reward: -9158.200000 ± 14856.187856, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #771: 128001it [01:49, 1165.98it/s, agent0/loss=81965.058, env_step=98688000, len=512, n/ep=40, n/st=25600, rew=-8947.76]                            


Epoch #771: test_reward: -5009.300000 ± 11876.096887, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #772: 128001it [01:49, 1164.00it/s, agent0/loss=82252.876, env_step=98816000, len=512, n/ep=60, n/st=25600, rew=-8042.54]                            


Epoch #772: test_reward: -8324.450000 ± 14408.101792, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #773: 128001it [01:48, 1184.62it/s, agent0/loss=82293.573, env_step=98944000, len=512, n/ep=40, n/st=25600, rew=-7825.36]                            


Epoch #773: test_reward: -10821.900000 ± 15583.430582, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #774: 128001it [01:49, 1172.71it/s, agent0/loss=82233.661, env_step=99072000, len=512, n/ep=60, n/st=25600, rew=-7815.90]                            


Epoch #774: test_reward: -8324.700000 ± 14407.958374, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #775: 128001it [01:48, 1179.76it/s, agent0/loss=82290.659, env_step=99200000, len=512, n/ep=40, n/st=25600, rew=-8278.94]                            


Epoch #775: test_reward: -8326.325000 ± 14407.023012, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #776: 128001it [01:51, 1151.39it/s, agent0/loss=82401.232, env_step=99328000, len=512, n/ep=60, n/st=25600, rew=-8786.62]                            


Epoch #776: test_reward: -9990.125000 ± 15246.812711, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #777: 128001it [01:47, 1186.25it/s, agent0/loss=82498.685, env_step=99456000, len=512, n/ep=40, n/st=25600, rew=-7643.57]                            


Epoch #777: test_reward: -7492.750000 ± 13894.590254, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #778: 128001it [01:48, 1177.72it/s, agent0/loss=82557.862, env_step=99584000, len=512, n/ep=60, n/st=25600, rew=-7854.76]                             


Epoch #778: test_reward: -6669.925000 ± 13305.060812, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #779: 128001it [01:49, 1166.16it/s, agent0/loss=82544.427, env_step=99712000, len=512, n/ep=40, n/st=25600, rew=-9629.38]                            


Epoch #779: test_reward: -12481.600000 ± 16110.374417, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #780: 128001it [01:49, 1170.18it/s, agent0/loss=82414.863, env_step=99840000, len=512, n/ep=60, n/st=25600, rew=-8244.43]                             


Epoch #780: test_reward: -3337.250000 ± 9980.940514, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #781: 128001it [01:49, 1167.63it/s, agent0/loss=82334.991, env_step=99968000, len=512, n/ep=40, n/st=25600, rew=-8304.00]                            


Epoch #781: test_reward: -9161.175000 ± 14854.359453, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #782: 128001it [01:49, 1168.63it/s, agent0/loss=82391.180, env_step=100096000, len=512, n/ep=60, n/st=25600, rew=-9067.24]                             


Epoch #782: test_reward: -9155.000000 ± 14858.153164, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #783: 128001it [01:48, 1177.21it/s, agent0/loss=82545.445, env_step=100224000, len=512, n/ep=40, n/st=25600, rew=-8445.31]                            


Epoch #783: test_reward: -11652.675000 ± 15870.125934, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #784: 128001it [01:49, 1166.06it/s, agent0/loss=82648.920, env_step=100352000, len=512, n/ep=60, n/st=25600, rew=-9848.44]                            


Epoch #784: test_reward: -6667.575000 ± 13306.231506, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #785: 128001it [01:51, 1151.28it/s, agent0/loss=82560.219, env_step=100480000, len=512, n/ep=40, n/st=25600, rew=-8637.24]                             


Epoch #785: test_reward: -9161.275000 ± 14854.298181, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #786: 128001it [01:50, 1161.26it/s, agent0/loss=82421.154, env_step=100608000, len=512, n/ep=60, n/st=25600, rew=-8665.14]                            


Epoch #786: test_reward: -10820.625000 ± 15584.313929, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #787: 128001it [01:44, 1221.28it/s, agent0/loss=82457.797, env_step=100736000, len=512, n/ep=40, n/st=25600, rew=-6752.23]                            


Epoch #787: test_reward: -9161.350000 ± 14854.252224, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #788: 128001it [01:44, 1220.35it/s, agent0/loss=82558.592, env_step=100864000, len=512, n/ep=60, n/st=25600, rew=-6693.24]                            


Epoch #788: test_reward: -11654.200000 ± 15869.009015, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #789: 128001it [01:52, 1141.51it/s, agent0/loss=82549.172, env_step=100992000, len=512, n/ep=40, n/st=25600, rew=-8663.38]                            


Epoch #789: test_reward: -5833.450000 ± 12640.972136, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #790: 128001it [01:50, 1160.49it/s, agent0/loss=82522.972, env_step=101120000, len=512, n/ep=60, n/st=25600, rew=-7464.20]                            


Epoch #790: test_reward: -7498.525000 ± 13891.486614, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #791: 128001it [01:37, 1314.72it/s, agent0/loss=82554.129, env_step=101248000, len=512, n/ep=40, n/st=25600, rew=-9827.70]                            


Epoch #791: test_reward: -9156.700000 ± 14857.109401, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #792: 128001it [01:36, 1321.51it/s, agent0/loss=82659.784, env_step=101376000, len=512, n/ep=60, n/st=25600, rew=-8429.32]                            


Epoch #792: test_reward: -9987.100000 ± 15248.788338, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #793: 128001it [01:36, 1324.08it/s, agent0/loss=82759.686, env_step=101504000, len=512, n/ep=40, n/st=25600, rew=-6464.66]                            


Epoch #793: test_reward: -10819.825000 ± 15584.866765, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #794: 128001it [01:38, 1295.95it/s, agent0/loss=82758.928, env_step=101632000, len=512, n/ep=60, n/st=25600, rew=-8974.50]                            


Epoch #794: test_reward: -9158.000000 ± 14856.310365, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #795: 128001it [01:39, 1283.79it/s, agent0/loss=82785.048, env_step=101760000, len=512, n/ep=40, n/st=25600, rew=-12176.01]                            


Epoch #795: test_reward: -9156.575000 ± 14857.185931, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #796: 128001it [01:40, 1269.50it/s, agent0/loss=82835.844, env_step=101888000, len=512, n/ep=60, n/st=25600, rew=-10598.51]                            


Epoch #796: test_reward: -9159.725000 ± 14855.250792, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #797: 128001it [01:39, 1281.82it/s, agent0/loss=82876.852, env_step=102016000, len=512, n/ep=40, n/st=25600, rew=-11393.45]                            


Epoch #797: test_reward: -10822.250000 ± 15583.188881, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #798: 128001it [01:40, 1277.02it/s, agent0/loss=82955.162, env_step=102144000, len=512, n/ep=60, n/st=25600, rew=-7385.52]                            


Epoch #798: test_reward: -11648.000000 ± 15873.548312, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #799: 128001it [01:39, 1287.66it/s, agent0/loss=82948.337, env_step=102272000, len=512, n/ep=40, n/st=25600, rew=-9677.62]                            


Epoch #799: test_reward: -6662.050000 ± 13308.987048, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #800: 128001it [01:39, 1292.40it/s, agent0/loss=82888.776, env_step=102400000, len=512, n/ep=60, n/st=25600, rew=-9308.96]                            


Epoch #800: test_reward: -5831.675000 ± 12641.786198, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #801: 128001it [01:39, 1289.33it/s, agent0/loss=82869.969, env_step=102528000, len=512, n/ep=40, n/st=25600, rew=-8606.42]                            


Epoch #801: test_reward: -9991.925000 ± 15245.637319, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #802: 128001it [01:39, 1287.88it/s, agent0/loss=82906.208, env_step=102656000, len=512, n/ep=60, n/st=25600, rew=-9714.08]                            


Epoch #802: test_reward: -9158.200000 ± 14856.187853, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #803: 128001it [01:40, 1278.26it/s, agent0/loss=82927.635, env_step=102784000, len=512, n/ep=40, n/st=25600, rew=-7643.04]                            


Epoch #803: test_reward: -9156.150000 ± 14857.446252, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #804: 128001it [01:39, 1286.43it/s, agent0/loss=82975.265, env_step=102912000, len=512, n/ep=60, n/st=25600, rew=-9033.08]                            


Epoch #804: test_reward: -7500.150000 ± 13890.613089, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #805: 128001it [01:40, 1279.40it/s, agent0/loss=83040.883, env_step=103040000, len=512, n/ep=40, n/st=25600, rew=-7724.66]                             


Epoch #805: test_reward: -9989.925000 ± 15246.942973, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #806: 128001it [01:39, 1282.83it/s, agent0/loss=83104.632, env_step=103168000, len=512, n/ep=60, n/st=25600, rew=-9618.42]                            


Epoch #806: test_reward: -9993.350000 ± 15244.705948, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #807: 128001it [01:38, 1297.47it/s, agent0/loss=83216.221, env_step=103296000, len=512, n/ep=40, n/st=25600, rew=-6445.93]                             


Epoch #807: test_reward: -8324.350000 ± 14408.159231, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #808: 128001it [01:40, 1278.03it/s, agent0/loss=83252.373, env_step=103424000, len=512, n/ep=60, n/st=25600, rew=-8783.85]                             


Epoch #808: test_reward: -8333.175000 ± 14403.075234, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #809: 128001it [01:40, 1277.84it/s, agent0/loss=83134.770, env_step=103552000, len=512, n/ep=40, n/st=25600, rew=-7273.57]                            


Epoch #809: test_reward: -9159.725000 ± 14855.250779, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #810: 128001it [01:38, 1294.81it/s, agent0/loss=83070.302, env_step=103680000, len=512, n/ep=60, n/st=25600, rew=-7220.08]                            


Epoch #810: test_reward: -7498.450000 ± 13891.526710, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #811: 128001it [01:39, 1288.86it/s, agent0/loss=83079.103, env_step=103808000, len=512, n/ep=40, n/st=25600, rew=-9981.17]                            


Epoch #811: test_reward: -9986.950000 ± 15248.886010, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #812: 128001it [01:39, 1280.26it/s, agent0/loss=82991.655, env_step=103936000, len=512, n/ep=60, n/st=25600, rew=-7586.12]                             


Epoch #812: test_reward: -10822.250000 ± 15583.188881, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #813: 128001it [01:39, 1289.51it/s, agent0/loss=82913.249, env_step=104064000, len=512, n/ep=40, n/st=25600, rew=-8395.27]                            


Epoch #813: test_reward: -5839.425000 ± 12638.227259, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #814: 128001it [01:38, 1299.11it/s, agent0/loss=83089.524, env_step=104192000, len=512, n/ep=60, n/st=25600, rew=-8207.27]                            


Epoch #814: test_reward: -7492.650000 ± 13894.643739, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #815: 128001it [01:39, 1292.56it/s, agent0/loss=83280.727, env_step=104320000, len=512, n/ep=40, n/st=25600, rew=-8747.62]                            


Epoch #815: test_reward: -5836.150000 ± 12639.731507, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #816: 128001it [01:39, 1284.24it/s, agent0/loss=83308.725, env_step=104448000, len=512, n/ep=60, n/st=25600, rew=-9183.50]                            


Epoch #816: test_reward: -8330.850000 ± 14404.416381, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #817: 128001it [01:39, 1280.24it/s, agent0/loss=83335.418, env_step=104576000, len=512, n/ep=40, n/st=25600, rew=-7719.90]                            


Epoch #817: test_reward: -6666.975000 ± 13306.532712, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #818: 128001it [01:39, 1282.00it/s, agent0/loss=83393.456, env_step=104704000, len=512, n/ep=60, n/st=25600, rew=-7543.38]                             


Epoch #818: test_reward: -9161.150000 ± 14854.374787, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #819: 128001it [01:38, 1297.68it/s, agent0/loss=83385.658, env_step=104832000, len=512, n/ep=40, n/st=25600, rew=-8247.33]                             


Epoch #819: test_reward: -5009.025000 ± 11876.215040, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #820: 128001it [01:39, 1290.08it/s, agent0/loss=83195.080, env_step=104960000, len=512, n/ep=60, n/st=25600, rew=-7493.80]                            


Epoch #820: test_reward: -9993.275000 ± 15244.754800, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #821: 128001it [01:39, 1284.65it/s, agent0/loss=83177.713, env_step=105088000, len=512, n/ep=40, n/st=25600, rew=-8887.55]                            


Epoch #821: test_reward: -9156.650000 ± 14857.140008, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #822: 128001it [01:39, 1281.39it/s, agent0/loss=83313.499, env_step=105216000, len=512, n/ep=60, n/st=25600, rew=-9472.37]                             


Epoch #822: test_reward: -9159.750000 ± 14855.235496, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #823: 128001it [01:39, 1284.10it/s, agent0/loss=83333.437, env_step=105344000, len=512, n/ep=40, n/st=25600, rew=-7353.76]                            


Epoch #823: test_reward: -9159.650000 ± 14855.296738, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #824: 128001it [01:38, 1293.60it/s, agent0/loss=83376.117, env_step=105472000, len=512, n/ep=60, n/st=25600, rew=-10042.68]                            


Epoch #824: test_reward: -10820.425000 ± 15584.451999, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #825: 128001it [01:39, 1289.14it/s, agent0/loss=83419.499, env_step=105600000, len=512, n/ep=40, n/st=25600, rew=-9049.19]                            


Epoch #825: test_reward: -12482.475000 ± 16109.697418, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #826: 128001it [01:41, 1262.47it/s, agent0/loss=83485.048, env_step=105728000, len=512, n/ep=60, n/st=25600, rew=-8555.12]                            


Epoch #826: test_reward: -9987.150000 ± 15248.755796, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #827: 128001it [01:41, 1261.75it/s, agent0/loss=83519.922, env_step=105856000, len=512, n/ep=40, n/st=25600, rew=-11890.61]                            


Epoch #827: test_reward: -8325.925000 ± 14407.252581, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #828: 128001it [01:41, 1262.90it/s, agent0/loss=83376.134, env_step=105984000, len=512, n/ep=60, n/st=25600, rew=-7841.04]                             


Epoch #828: test_reward: -9987.125000 ± 15248.772067, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #829: 128001it [01:41, 1263.69it/s, agent0/loss=83248.988, env_step=106112000, len=512, n/ep=40, n/st=25600, rew=-8946.15]                            


Epoch #829: test_reward: -8324.700000 ± 14407.958374, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #830: 128001it [01:40, 1269.62it/s, agent0/loss=83383.359, env_step=106240000, len=512, n/ep=60, n/st=25600, rew=-8516.40]                            


Epoch #830: test_reward: -6663.950000 ± 13308.041030, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #831: 128001it [01:40, 1274.97it/s, agent0/loss=83581.455, env_step=106368000, len=512, n/ep=40, n/st=25600, rew=-7708.95]                            


Epoch #831: test_reward: -9990.375000 ± 15246.649920, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #832: 128001it [01:40, 1273.47it/s, agent0/loss=83608.666, env_step=106496000, len=512, n/ep=60, n/st=25600, rew=-8077.05]                            


Epoch #832: test_reward: -10817.575000 ± 15586.425195, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #833: 128001it [01:39, 1285.64it/s, agent0/loss=83502.820, env_step=106624000, len=512, n/ep=40, n/st=25600, rew=-7027.70]                            


Epoch #833: test_reward: -8327.675000 ± 14406.245204, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #834: 128001it [01:40, 1277.20it/s, agent0/loss=83443.420, env_step=106752000, len=512, n/ep=60, n/st=25600, rew=-7080.77]                            


Epoch #834: test_reward: -5002.000000 ± 11879.155446, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #835: 128001it [01:39, 1284.42it/s, agent0/loss=83457.226, env_step=106880000, len=512, n/ep=40, n/st=25600, rew=-6408.59]                            


Epoch #835: test_reward: -8323.200000 ± 14408.821831, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #836: 128001it [01:39, 1280.83it/s, agent0/loss=83126.793, env_step=107008000, len=512, n/ep=60, n/st=25600, rew=-8537.76]                            


Epoch #836: test_reward: -6665.250000 ± 13307.392436, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #837: 128001it [01:39, 1282.15it/s, agent0/loss=82883.230, env_step=107136000, len=512, n/ep=40, n/st=25600, rew=-9912.96]                            


Epoch #837: test_reward: -7494.025000 ± 13893.904882, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #838: 128001it [01:40, 1275.91it/s, agent0/loss=83259.605, env_step=107264000, len=512, n/ep=60, n/st=25600, rew=-8780.53]                            


Epoch #838: test_reward: -6667.725000 ± 13306.157146, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #839: 128001it [01:53, 1124.12it/s, agent0/loss=83619.649, env_step=107392000, len=512, n/ep=40, n/st=25600, rew=-8363.30]                            


Epoch #839: test_reward: -5831.900000 ± 12641.683505, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #840: 128001it [02:14, 954.58it/s, agent0/loss=83775.649, env_step=107520000, len=512, n/ep=60, n/st=25600, rew=-8498.88]                             


Epoch #840: test_reward: -8324.725000 ± 14407.944038, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #841: 128001it [02:13, 956.39it/s, agent0/loss=83931.461, env_step=107648000, len=512, n/ep=40, n/st=25600, rew=-9991.69]                             


Epoch #841: test_reward: -8329.475000 ± 14405.208947, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #842: 128001it [02:05, 1023.28it/s, agent0/loss=83949.999, env_step=107776000, len=512, n/ep=60, n/st=25600, rew=-10727.67]                            


Epoch #842: test_reward: -6666.825000 ± 13306.607184, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #843: 128001it [02:06, 1008.90it/s, agent0/loss=83807.642, env_step=107904000, len=512, n/ep=40, n/st=25600, rew=-8709.62]                             


Epoch #843: test_reward: -11653.950000 ± 15869.191761, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #844: 128001it [02:08, 996.41it/s, agent0/loss=83678.091, env_step=108032000, len=512, n/ep=60, n/st=25600, rew=-9719.33]                             


Epoch #844: test_reward: -11652.750000 ± 15870.071149, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #845: 128001it [02:04, 1024.38it/s, agent0/loss=83781.034, env_step=108160000, len=512, n/ep=40, n/st=25600, rew=-8177.43]                            


Epoch #845: test_reward: -9988.625000 ± 15247.792494, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #846: 128001it [03:17, 648.52it/s, agent0/loss=83964.649, env_step=108288000, len=512, n/ep=60, n/st=25600, rew=-6651.94]                             


Epoch #846: test_reward: -6666.425000 ± 13306.805847, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #847: 128001it [06:12, 343.51it/s, agent0/loss=83996.589, env_step=108416000, len=512, n/ep=40, n/st=25600, rew=-6486.41]                             


Epoch #847: test_reward: -8329.300000 ± 14405.309379, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #848: 128001it [05:41, 375.35it/s, agent0/loss=83935.595, env_step=108544000, len=512, n/ep=60, n/st=25600, rew=-8477.23]                             


Epoch #848: test_reward: -9158.200000 ± 14856.187844, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #849: 128001it [06:05, 350.05it/s, agent0/loss=83954.430, env_step=108672000, len=512, n/ep=40, n/st=25600, rew=-8621.69]                             


Epoch #849: test_reward: -10820.725000 ± 15584.244871, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #850: 128001it [06:30, 327.86it/s, agent0/loss=83902.538, env_step=108800000, len=512, n/ep=60, n/st=25600, rew=-8414.67]                             


Epoch #850: test_reward: -13315.200000 ± 16301.196697, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #851: 128001it [06:02, 353.59it/s, agent0/loss=83860.339, env_step=108928000, len=512, n/ep=40, n/st=25600, rew=-6913.96]                            


Epoch #851: test_reward: -5829.900000 ± 12642.599999, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #852: 128001it [05:49, 366.17it/s, agent0/loss=84005.605, env_step=109056000, len=512, n/ep=60, n/st=25600, rew=-8365.48]                            


Epoch #852: test_reward: -8332.350000 ± 14403.551896, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #853: 128001it [04:45, 448.34it/s, agent0/loss=84084.835, env_step=109184000, len=512, n/ep=40, n/st=25600, rew=-9741.56]                             


Epoch #853: test_reward: -8330.800000 ± 14404.445156, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #854: 128001it [04:17, 497.80it/s, agent0/loss=84109.619, env_step=109312000, len=512, n/ep=60, n/st=25600, rew=-9053.76]                            


Epoch #854: test_reward: -10820.475000 ± 15584.417495, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #855: 128001it [04:16, 499.12it/s, agent0/loss=84180.396, env_step=109440000, len=512, n/ep=40, n/st=25600, rew=-6249.40]                            


Epoch #855: test_reward: -11654.150000 ± 15869.045530, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #856: 128001it [04:20, 491.08it/s, agent0/loss=84254.677, env_step=109568000, len=512, n/ep=60, n/st=25600, rew=-7302.32]                            


Epoch #856: test_reward: -9988.525000 ± 15247.857600, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #857: 128001it [04:22, 487.44it/s, agent0/loss=84272.479, env_step=109696000, len=512, n/ep=40, n/st=25600, rew=-9138.00]                            


Epoch #857: test_reward: -6663.675000 ± 13308.177466, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #858: 128001it [04:26, 480.38it/s, agent0/loss=84176.181, env_step=109824000, len=512, n/ep=60, n/st=25600, rew=-8096.81]                             


Epoch #858: test_reward: -5834.200000 ± 12640.626330, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #859: 128001it [04:20, 490.48it/s, agent0/loss=83965.326, env_step=109952000, len=512, n/ep=40, n/st=25600, rew=-7476.48]                             


Epoch #859: test_reward: -9991.225000 ± 15246.093522, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #860: 128001it [04:22, 486.80it/s, agent0/loss=84027.958, env_step=110080000, len=512, n/ep=60, n/st=25600, rew=-9561.33]                            


Epoch #860: test_reward: -9158.325000 ± 14856.111312, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #861: 128001it [04:21, 489.36it/s, agent0/loss=84251.303, env_step=110208000, len=512, n/ep=40, n/st=25600, rew=-9343.81]                            


Epoch #861: test_reward: -9156.725000 ± 14857.094100, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #862: 128001it [04:16, 498.36it/s, agent0/loss=84303.439, env_step=110336000, len=512, n/ep=60, n/st=25600, rew=-9107.46]                            


Epoch #862: test_reward: -9988.500000 ± 15247.873898, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #863: 128001it [04:17, 496.49it/s, agent0/loss=84246.635, env_step=110464000, len=512, n/ep=40, n/st=25600, rew=-11597.75]                            


Epoch #863: test_reward: -9158.775000 ± 14855.834030, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #864: 128001it [04:18, 495.15it/s, agent0/loss=84263.300, env_step=110592000, len=512, n/ep=60, n/st=25600, rew=-8101.86]                             


Epoch #864: test_reward: -7498.775000 ± 13891.352626, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #865: 128001it [04:18, 495.86it/s, agent0/loss=84332.593, env_step=110720000, len=512, n/ep=40, n/st=25600, rew=-8510.59]                            


Epoch #865: test_reward: -9156.650000 ± 14857.140005, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #866: 128001it [04:22, 486.73it/s, agent0/loss=84311.606, env_step=110848000, len=512, n/ep=60, n/st=25600, rew=-9094.57]                            


Epoch #866: test_reward: -6663.750000 ± 13308.140236, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #867: 128001it [04:24, 484.72it/s, agent0/loss=84287.659, env_step=110976000, len=512, n/ep=40, n/st=25600, rew=-8086.98]                            


Epoch #867: test_reward: -7492.700000 ± 13894.616992, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #868: 128001it [04:23, 485.63it/s, agent0/loss=84315.363, env_step=111104000, len=512, n/ep=60, n/st=25600, rew=-7246.98]                            


Epoch #868: test_reward: -9156.525000 ± 14857.216563, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #869: 128001it [04:20, 490.61it/s, agent0/loss=84423.931, env_step=111232000, len=512, n/ep=40, n/st=25600, rew=-8947.61]                            


Epoch #869: test_reward: -10826.350000 ± 15580.348070, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #870: 128001it [04:20, 491.78it/s, agent0/loss=84447.867, env_step=111360000, len=512, n/ep=60, n/st=25600, rew=-8785.58]                            


Epoch #870: test_reward: -7498.850000 ± 13891.312479, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #871: 128001it [04:21, 489.51it/s, agent0/loss=84492.253, env_step=111488000, len=512, n/ep=40, n/st=25600, rew=-10105.35]                            


Epoch #871: test_reward: -8324.550000 ± 14408.044425, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #872: 128001it [04:20, 491.34it/s, agent0/loss=84595.578, env_step=111616000, len=512, n/ep=60, n/st=25600, rew=-7812.16]                            


Epoch #872: test_reward: -6666.675000 ± 13306.681651, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #873: 128001it [04:24, 484.54it/s, agent0/loss=84501.218, env_step=111744000, len=512, n/ep=40, n/st=25600, rew=-7471.02]                             


Epoch #873: test_reward: -9987.150000 ± 15248.755796, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #874: 128001it [04:15, 500.08it/s, agent0/loss=84392.424, env_step=111872000, len=512, n/ep=60, n/st=25600, rew=-9231.80]                             


Epoch #874: test_reward: -8326.100000 ± 14407.152095, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #875: 128001it [04:15, 500.33it/s, agent0/loss=84362.979, env_step=112000000, len=512, n/ep=40, n/st=25600, rew=-7186.99]                             


Epoch #875: test_reward: -9153.550000 ± 14859.043660, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #876: 128001it [04:15, 500.96it/s, agent0/loss=84354.481, env_step=112128000, len=512, n/ep=60, n/st=25600, rew=-9155.83]                            


Epoch #876: test_reward: -6668.100000 ± 13305.970782, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #877: 128001it [04:15, 500.22it/s, agent0/loss=84462.562, env_step=112256000, len=512, n/ep=40, n/st=25600, rew=-6689.98]                            


Epoch #877: test_reward: -10825.450000 ± 15580.972620, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #878: 128001it [04:15, 500.21it/s, agent0/loss=84481.251, env_step=112384000, len=512, n/ep=60, n/st=25600, rew=-7366.23]                            


Epoch #878: test_reward: -10820.325000 ± 15584.521241, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #879: 128001it [04:16, 499.57it/s, agent0/loss=84474.070, env_step=112512000, len=512, n/ep=40, n/st=25600, rew=-7887.18]                             


Epoch #879: test_reward: -8324.450000 ± 14408.101847, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #880: 128001it [04:15, 500.57it/s, agent0/loss=84611.868, env_step=112640000, len=512, n/ep=60, n/st=25600, rew=-9577.96]                             


Epoch #880: test_reward: -9162.675000 ± 14853.437268, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #881: 128001it [04:16, 499.67it/s, agent0/loss=84749.019, env_step=112768000, len=512, n/ep=40, n/st=25600, rew=-8660.91]                            


Epoch #881: test_reward: -9164.175000 ± 14852.514879, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #882: 128001it [04:30, 473.74it/s, agent0/loss=84786.180, env_step=112896000, len=512, n/ep=60, n/st=25600, rew=-7240.07]                            


Epoch #882: test_reward: -6663.825000 ± 13308.103037, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #883: 128001it [04:30, 473.72it/s, agent0/loss=84766.941, env_step=113024000, len=512, n/ep=40, n/st=25600, rew=-7363.43]                             


Epoch #883: test_reward: -9160.475000 ± 14854.788849, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #884: 128001it [04:17, 496.14it/s, agent0/loss=84751.699, env_step=113152000, len=512, n/ep=60, n/st=25600, rew=-7311.23]                             


Epoch #884: test_reward: -6666.625000 ± 13306.706572, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #885: 128001it [04:18, 495.19it/s, agent0/loss=84740.368, env_step=113280000, len=512, n/ep=40, n/st=25600, rew=-7523.80]                            


Epoch #885: test_reward: -9990.000000 ± 15246.894139, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #886: 128001it [04:19, 492.85it/s, agent0/loss=84780.655, env_step=113408000, len=512, n/ep=60, n/st=25600, rew=-10405.09]                            


Epoch #886: test_reward: -6662.250000 ± 13308.887846, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #887: 128001it [04:23, 485.85it/s, agent0/loss=84860.038, env_step=113536000, len=512, n/ep=40, n/st=25600, rew=-8492.83]                            


Epoch #887: test_reward: -10825.450000 ± 15580.972625, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #888: 128001it [04:22, 488.48it/s, agent0/loss=84946.692, env_step=113664000, len=512, n/ep=60, n/st=25600, rew=-8993.28]                             


Epoch #888: test_reward: -9159.800000 ± 14855.204837, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #889: 128001it [04:21, 490.33it/s, agent0/loss=84849.213, env_step=113792000, len=512, n/ep=40, n/st=25600, rew=-8583.60]                             


Epoch #889: test_reward: -5009.100000 ± 11876.183783, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #890: 128001it [04:24, 483.71it/s, agent0/loss=84725.380, env_step=113920000, len=512, n/ep=60, n/st=25600, rew=-7240.64]                            


Epoch #890: test_reward: -8324.425000 ± 14408.116147, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #891: 128001it [04:21, 489.77it/s, agent0/loss=84711.330, env_step=114048000, len=512, n/ep=40, n/st=25600, rew=-9338.21]                            


Epoch #891: test_reward: -9159.800000 ± 14855.204842, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #892: 128001it [04:12, 507.46it/s, agent0/loss=84665.978, env_step=114176000, len=512, n/ep=60, n/st=25600, rew=-10377.02]                            


Epoch #892: test_reward: -6663.650000 ± 13308.189854, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #893: 128001it [04:13, 505.71it/s, agent0/loss=84765.908, env_step=114304000, len=512, n/ep=40, n/st=25600, rew=-8720.75]                            


Epoch #893: test_reward: -10820.575000 ± 15584.348429, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #894: 128001it [04:15, 500.03it/s, agent0/loss=84822.995, env_step=114432000, len=512, n/ep=60, n/st=25600, rew=-7615.33]                            


Epoch #894: test_reward: -8327.600000 ± 14406.288259, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #895: 128001it [04:15, 501.04it/s, agent0/loss=84901.194, env_step=114560000, len=512, n/ep=40, n/st=25600, rew=-9569.88]                            


Epoch #895: test_reward: -8330.775000 ± 14404.459489, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #896: 128001it [04:15, 501.31it/s, agent0/loss=85118.943, env_step=114688000, len=512, n/ep=60, n/st=25600, rew=-9140.13]                            


Epoch #896: test_reward: -5834.950000 ± 12640.283385, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #897: 128001it [04:15, 501.57it/s, agent0/loss=85064.729, env_step=114816000, len=512, n/ep=40, n/st=25600, rew=-6336.18]                             


Epoch #897: test_reward: -9159.800000 ± 14855.204846, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #898: 128001it [04:15, 500.84it/s, agent0/loss=84853.703, env_step=114944000, len=512, n/ep=60, n/st=25600, rew=-10571.80]                            


Epoch #898: test_reward: -6663.550000 ± 13308.239525, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #899: 128001it [04:15, 501.65it/s, agent0/loss=84658.129, env_step=115072000, len=512, n/ep=40, n/st=25600, rew=-9885.02]                            


Epoch #899: test_reward: -9162.950000 ± 14853.268761, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #900: 128001it [04:15, 501.29it/s, agent0/loss=84653.359, env_step=115200000, len=512, n/ep=60, n/st=25600, rew=-8456.08]                            


Epoch #900: test_reward: -9161.375000 ± 14854.236919, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #901: 128001it [04:15, 501.88it/s, agent0/loss=84889.996, env_step=115328000, len=512, n/ep=40, n/st=25600, rew=-8190.93]                            


Epoch #901: test_reward: -8326.075000 ± 14407.166438, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #902: 128001it [04:14, 502.41it/s, agent0/loss=85082.561, env_step=115456000, len=512, n/ep=60, n/st=25600, rew=-7785.48]                            


Epoch #902: test_reward: -10822.275000 ± 15583.171616, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #903: 128001it [04:14, 502.88it/s, agent0/loss=85168.696, env_step=115584000, len=512, n/ep=40, n/st=25600, rew=-7502.86]                            


Epoch #903: test_reward: -5003.750000 ± 11878.423394, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #904: 128001it [04:15, 501.68it/s, agent0/loss=85209.581, env_step=115712000, len=512, n/ep=60, n/st=25600, rew=-7951.73]                            


Epoch #904: test_reward: -7492.475000 ± 13894.737356, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #905: 128001it [04:14, 502.14it/s, agent0/loss=85217.807, env_step=115840000, len=512, n/ep=40, n/st=25600, rew=-10564.45]                            


Epoch #905: test_reward: -9992.700000 ± 15245.129659, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #906: 128001it [04:15, 500.87it/s, agent0/loss=85179.046, env_step=115968000, len=512, n/ep=60, n/st=25600, rew=-9265.92]                            


Epoch #906: test_reward: -9990.325000 ± 15246.682474, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #907: 128001it [04:15, 501.05it/s, agent0/loss=85225.582, env_step=116096000, len=512, n/ep=40, n/st=25600, rew=-8460.33]                            


Epoch #907: test_reward: -8327.675000 ± 14406.245200, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #908: 128001it [04:15, 501.72it/s, agent0/loss=85307.156, env_step=116224000, len=512, n/ep=60, n/st=25600, rew=-6429.01]                            


Epoch #908: test_reward: -9155.100000 ± 14858.091945, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #909: 128001it [04:14, 502.51it/s, agent0/loss=85249.942, env_step=116352000, len=512, n/ep=40, n/st=25600, rew=-11765.25]                            


Epoch #909: test_reward: -8326.150000 ± 14407.123395, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #910: 128001it [04:14, 502.42it/s, agent0/loss=85295.035, env_step=116480000, len=512, n/ep=60, n/st=25600, rew=-9381.35]                             


Epoch #910: test_reward: -6663.625000 ± 13308.202277, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #911: 128001it [04:14, 502.28it/s, agent0/loss=85372.008, env_step=116608000, len=512, n/ep=40, n/st=25600, rew=-9805.64]                             


Epoch #911: test_reward: -6667.000000 ± 13306.520308, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #912: 128001it [04:15, 500.96it/s, agent0/loss=85285.365, env_step=116736000, len=512, n/ep=60, n/st=25600, rew=-9762.07]                            


Epoch #912: test_reward: -8323.175000 ± 14408.836162, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #913: 128001it [04:14, 502.21it/s, agent0/loss=85176.089, env_step=116864000, len=512, n/ep=40, n/st=25600, rew=-8847.90]                             


Epoch #913: test_reward: -5831.875000 ± 12641.694926, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #914: 128001it [04:14, 503.27it/s, agent0/loss=85245.759, env_step=116992000, len=512, n/ep=60, n/st=25600, rew=-8446.53]                            


Epoch #914: test_reward: -9162.650000 ± 14853.452595, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #915: 128001it [04:15, 500.56it/s, agent0/loss=85412.281, env_step=117120000, len=512, n/ep=40, n/st=25600, rew=-7291.71]                            


Epoch #915: test_reward: -5002.750000 ± 11878.843125, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #916: 128001it [04:15, 501.81it/s, agent0/loss=85427.442, env_step=117248000, len=512, n/ep=60, n/st=25600, rew=-9696.33]                            


Epoch #916: test_reward: -8327.650000 ± 14406.259644, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #917: 128001it [04:15, 501.94it/s, agent0/loss=85489.943, env_step=117376000, len=512, n/ep=40, n/st=25600, rew=-10489.81]                            


Epoch #917: test_reward: -4168.850000 ± 11003.000444, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #918: 128001it [04:15, 500.80it/s, agent0/loss=85177.193, env_step=117504000, len=512, n/ep=60, n/st=25600, rew=-9887.27]                            


Epoch #918: test_reward: -6669.575000 ± 13305.234799, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #919: 128001it [04:15, 501.13it/s, agent0/loss=84777.264, env_step=117632000, len=512, n/ep=40, n/st=25600, rew=-8014.66]                            


Epoch #919: test_reward: -7498.800000 ± 13891.339255, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #920: 128001it [04:15, 501.77it/s, agent0/loss=84897.607, env_step=117760000, len=512, n/ep=60, n/st=25600, rew=-7181.18]                             


Epoch #920: test_reward: -9158.250000 ± 14856.157235, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #921: 128001it [04:15, 500.62it/s, agent0/loss=85281.023, env_step=117888000, len=512, n/ep=40, n/st=25600, rew=-7767.20]                            


Epoch #921: test_reward: -7497.150000 ± 13892.225964, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #922: 128001it [04:15, 501.83it/s, agent0/loss=85582.216, env_step=118016000, len=512, n/ep=60, n/st=25600, rew=-8342.95]                            


Epoch #922: test_reward: -9155.025000 ± 14858.137867, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #923: 128001it [04:15, 501.10it/s, agent0/loss=85680.954, env_step=118144000, len=512, n/ep=40, n/st=25600, rew=-9744.81]                             


Epoch #923: test_reward: -9993.425000 ± 15244.657067, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #924: 128001it [04:15, 501.09it/s, agent0/loss=85757.772, env_step=118272000, len=512, n/ep=60, n/st=25600, rew=-7315.64]                            


Epoch #924: test_reward: -8330.825000 ± 14404.430738, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #925: 128001it [04:15, 501.92it/s, agent0/loss=85742.602, env_step=118400000, len=512, n/ep=40, n/st=25600, rew=-8014.74]                            


Epoch #925: test_reward: -9988.775000 ± 15247.694843, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #926: 128001it [04:14, 502.21it/s, agent0/loss=85632.962, env_step=118528000, len=512, n/ep=60, n/st=25600, rew=-6735.78]                            


Epoch #926: test_reward: -12483.050000 ± 16109.253519, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #927: 128001it [04:15, 500.68it/s, agent0/loss=85678.684, env_step=118656000, len=512, n/ep=40, n/st=25600, rew=-8178.91]                             


Epoch #927: test_reward: -8327.400000 ± 14406.403100, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #928: 128001it [04:15, 500.42it/s, agent0/loss=85798.251, env_step=118784000, len=512, n/ep=60, n/st=25600, rew=-9444.14]                            


Epoch #928: test_reward: -10822.175000 ± 15583.240690, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #929: 128001it [04:15, 501.80it/s, agent0/loss=85851.597, env_step=118912000, len=512, n/ep=40, n/st=25600, rew=-6609.79]                            


Epoch #929: test_reward: -8324.625000 ± 14408.001384, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #930: 128001it [04:15, 501.74it/s, agent0/loss=85862.010, env_step=119040000, len=512, n/ep=60, n/st=25600, rew=-6540.48]                             


Epoch #930: test_reward: -5001.375000 ± 11879.418946, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #931: 128001it [04:15, 500.81it/s, agent0/loss=85876.940, env_step=119168000, len=512, n/ep=40, n/st=25600, rew=-9631.01]                            


Epoch #931: test_reward: -6665.425000 ± 13307.305591, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #932: 128001it [04:15, 501.14it/s, agent0/loss=85842.949, env_step=119296000, len=512, n/ep=60, n/st=25600, rew=-9416.99]                             


Epoch #932: test_reward: -8323.000000 ± 14408.936513, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #933: 128001it [04:15, 501.33it/s, agent0/loss=85802.288, env_step=119424000, len=512, n/ep=40, n/st=25600, rew=-7191.68]                            


Epoch #933: test_reward: -10823.375000 ± 15582.409027, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #934: 128001it [04:15, 501.56it/s, agent0/loss=85856.213, env_step=119552000, len=512, n/ep=60, n/st=25600, rew=-8956.17]                            


Epoch #934: test_reward: -9157.925000 ± 14856.356282, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #935: 128001it [04:15, 500.91it/s, agent0/loss=85696.900, env_step=119680000, len=512, n/ep=40, n/st=25600, rew=-8205.80]                            


Epoch #935: test_reward: -9160.975000 ± 14854.482048, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #936: 128001it [04:14, 503.00it/s, agent0/loss=85622.377, env_step=119808000, len=512, n/ep=60, n/st=25600, rew=-9491.87]                            


Epoch #936: test_reward: -7494.050000 ± 13893.891505, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #937: 128001it [04:15, 501.35it/s, agent0/loss=85877.782, env_step=119936000, len=512, n/ep=40, n/st=25600, rew=-8975.64]                            


Epoch #937: test_reward: -9154.825000 ± 14858.260384, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #938: 128001it [04:15, 501.18it/s, agent0/loss=86035.266, env_step=120064000, len=512, n/ep=60, n/st=25600, rew=-8963.32]                            


Epoch #938: test_reward: -7492.350000 ± 13894.804235, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #939: 128001it [04:15, 501.32it/s, agent0/loss=86102.403, env_step=120192000, len=512, n/ep=40, n/st=25600, rew=-9756.73]                            


Epoch #939: test_reward: -7497.175000 ± 13892.212538, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #940: 128001it [04:15, 500.65it/s, agent0/loss=86162.416, env_step=120320000, len=512, n/ep=60, n/st=25600, rew=-7093.26]                            


Epoch #940: test_reward: -7497.475000 ± 13892.051943, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #941: 128001it [04:14, 502.01it/s, agent0/loss=86114.199, env_step=120448000, len=512, n/ep=40, n/st=25600, rew=-10446.67]                            


Epoch #941: test_reward: -10819.100000 ± 15585.369660, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #942: 128001it [04:14, 502.13it/s, agent0/loss=85957.469, env_step=120576000, len=512, n/ep=60, n/st=25600, rew=-8487.35]                            


Epoch #942: test_reward: -7496.800000 ± 13892.413599, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #943: 128001it [04:15, 500.04it/s, agent0/loss=85616.876, env_step=120704000, len=512, n/ep=40, n/st=25600, rew=-8635.74]                            


Epoch #943: test_reward: -9989.850000 ± 15246.992012, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #944: 128001it [04:15, 500.80it/s, agent0/loss=85653.497, env_step=120832000, len=512, n/ep=60, n/st=25600, rew=-9781.52]                            


Epoch #944: test_reward: -6668.350000 ± 13305.846532, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #945: 128001it [04:15, 500.89it/s, agent0/loss=85966.814, env_step=120960000, len=512, n/ep=40, n/st=25600, rew=-11159.15]                            


Epoch #945: test_reward: -8329.300000 ± 14405.309403, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #946: 128001it [04:14, 502.95it/s, agent0/loss=86108.345, env_step=121088000, len=512, n/ep=60, n/st=25600, rew=-8779.92]                            


Epoch #946: test_reward: -7494.200000 ± 13893.811249, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #947: 128001it [04:15, 500.41it/s, agent0/loss=86097.740, env_step=121216000, len=512, n/ep=40, n/st=25600, rew=-8986.86]                             


Epoch #947: test_reward: -8327.650000 ± 14406.259561, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #948: 128001it [04:15, 501.78it/s, agent0/loss=86085.424, env_step=121344000, len=512, n/ep=60, n/st=25600, rew=-9529.83]                            


Epoch #948: test_reward: -9159.625000 ± 14855.312083, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #949: 128001it [04:14, 502.14it/s, agent0/loss=86201.152, env_step=121472000, len=512, n/ep=40, n/st=25600, rew=-9261.24]                            


Epoch #949: test_reward: -5834.825000 ± 12640.340484, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #950: 128001it [04:15, 500.65it/s, agent0/loss=86245.644, env_step=121600000, len=512, n/ep=60, n/st=25600, rew=-10599.58]                            


Epoch #950: test_reward: -5002.600000 ± 11878.905646, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #951: 128001it [04:15, 501.84it/s, agent0/loss=86275.969, env_step=121728000, len=512, n/ep=40, n/st=25600, rew=-10877.56]                            


Epoch #951: test_reward: -7495.825000 ± 13892.938416, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #952: 128001it [04:16, 500.00it/s, agent0/loss=86360.236, env_step=121856000, len=512, n/ep=60, n/st=25600, rew=-7972.03]                            


Epoch #952: test_reward: -7497.350000 ± 13892.118842, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #953: 128001it [04:15, 501.59it/s, agent0/loss=86393.246, env_step=121984000, len=512, n/ep=40, n/st=25600, rew=-6337.95]                            


Epoch #953: test_reward: -9163.100000 ± 14853.176848, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #954: 128001it [04:14, 502.43it/s, agent0/loss=86071.906, env_step=122112000, len=512, n/ep=60, n/st=25600, rew=-8364.53]                            


Epoch #954: test_reward: -8329.175000 ± 14405.381185, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #955: 128001it [04:15, 501.82it/s, agent0/loss=85662.751, env_step=122240000, len=512, n/ep=40, n/st=25600, rew=-8574.60]                            


Epoch #955: test_reward: -9164.150000 ± 14852.530309, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #956: 128001it [04:15, 501.80it/s, agent0/loss=85667.573, env_step=122368000, len=512, n/ep=60, n/st=25600, rew=-8506.45]                            


Epoch #956: test_reward: -10822.325000 ± 15583.137098, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #957: 128001it [04:15, 500.17it/s, agent0/loss=86111.989, env_step=122496000, len=512, n/ep=40, n/st=25600, rew=-8709.62]                            


Epoch #957: test_reward: -10823.825000 ± 15582.098137, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #958: 128001it [04:15, 501.87it/s, agent0/loss=86458.198, env_step=122624000, len=512, n/ep=60, n/st=25600, rew=-6937.15]                            


Epoch #958: test_reward: -7500.550000 ± 13890.398777, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #959: 128001it [04:15, 501.88it/s, agent0/loss=86517.275, env_step=122752000, len=512, n/ep=40, n/st=25600, rew=-6744.48]                            


Epoch #959: test_reward: -11652.550000 ± 15870.217256, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #960: 128001it [04:15, 500.88it/s, agent0/loss=86429.471, env_step=122880000, len=512, n/ep=60, n/st=25600, rew=-7570.22]                             


Epoch #960: test_reward: -5005.675000 ± 11877.618085, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #961: 128001it [04:14, 502.68it/s, agent0/loss=86345.330, env_step=123008000, len=512, n/ep=40, n/st=25600, rew=-9360.94]                            


Epoch #961: test_reward: -6666.975000 ± 13306.532724, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #962: 128001it [04:15, 501.06it/s, agent0/loss=86443.746, env_step=123136000, len=512, n/ep=60, n/st=25600, rew=-8148.77]                            


Epoch #962: test_reward: -9155.200000 ± 14858.030750, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #963: 128001it [04:14, 502.14it/s, agent0/loss=86521.202, env_step=123264000, len=512, n/ep=40, n/st=25600, rew=-9838.29]                            


Epoch #963: test_reward: -7500.025000 ± 13890.679975, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #964: 128001it [04:15, 500.92it/s, agent0/loss=86577.168, env_step=123392000, len=512, n/ep=60, n/st=25600, rew=-9483.57]                            


Epoch #964: test_reward: -8327.575000 ± 14406.302598, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #965: 128001it [04:15, 500.77it/s, agent0/loss=86659.335, env_step=123520000, len=512, n/ep=40, n/st=25600, rew=-7548.62]                            


Epoch #965: test_reward: -10823.425000 ± 15582.374570, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #966: 128001it [04:15, 500.82it/s, agent0/loss=86714.008, env_step=123648000, len=512, n/ep=60, n/st=25600, rew=-8937.59]                            


Epoch #966: test_reward: -3338.425000 ± 9980.550273, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #967: 128001it [04:15, 500.19it/s, agent0/loss=86669.764, env_step=123776000, len=512, n/ep=40, n/st=25600, rew=-9617.76]                            


Epoch #967: test_reward: -8326.350000 ± 14407.008674, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #968: 128001it [04:15, 501.21it/s, agent0/loss=86591.915, env_step=123904000, len=512, n/ep=60, n/st=25600, rew=-8415.39]                            


Epoch #968: test_reward: -8324.725000 ± 14407.944036, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #969: 128001it [04:15, 501.04it/s, agent0/loss=86648.569, env_step=124032000, len=512, n/ep=40, n/st=25600, rew=-10022.80]                            


Epoch #969: test_reward: -7497.400000 ± 13892.092094, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #970: 128001it [04:15, 501.70it/s, agent0/loss=86735.397, env_step=124160000, len=512, n/ep=60, n/st=25600, rew=-9477.83]                            


Epoch #970: test_reward: -8324.600000 ± 14408.015744, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #971: 128001it [04:15, 501.93it/s, agent0/loss=86789.946, env_step=124288000, len=512, n/ep=40, n/st=25600, rew=-7958.66]                            


Epoch #971: test_reward: -8326.325000 ± 14407.023014, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #972: 128001it [04:15, 500.75it/s, agent0/loss=86828.270, env_step=124416000, len=512, n/ep=60, n/st=25600, rew=-8922.32]                            


Epoch #972: test_reward: -7495.625000 ± 13893.045447, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #973: 128001it [04:15, 500.97it/s, agent0/loss=86829.297, env_step=124544000, len=512, n/ep=40, n/st=25600, rew=-8376.51]                            


Epoch #973: test_reward: -9156.625000 ± 14857.155335, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #974: 128001it [04:15, 501.79it/s, agent0/loss=86870.225, env_step=124672000, len=512, n/ep=60, n/st=25600, rew=-8865.39]                            


Epoch #974: test_reward: -9155.175000 ± 14858.046047, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #975: 128001it [04:16, 499.93it/s, agent0/loss=86849.211, env_step=124800000, len=512, n/ep=40, n/st=25600, rew=-6609.93]                             


Epoch #975: test_reward: -9158.225000 ± 14856.172555, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #976: 128001it [04:13, 504.11it/s, agent0/loss=86836.696, env_step=124928000, len=512, n/ep=60, n/st=25600, rew=-7273.11]                            


Epoch #976: test_reward: -11651.125000 ± 15871.260872, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #977: 128001it [04:15, 501.52it/s, agent0/loss=86933.138, env_step=125056000, len=512, n/ep=40, n/st=25600, rew=-7138.05]                             


Epoch #977: test_reward: -5831.700000 ± 12641.774785, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #978: 128001it [04:15, 500.92it/s, agent0/loss=86987.537, env_step=125184000, len=512, n/ep=60, n/st=25600, rew=-8364.07]                            


Epoch #978: test_reward: -9991.700000 ± 15245.783867, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #979: 128001it [04:15, 501.84it/s, agent0/loss=86920.916, env_step=125312000, len=512, n/ep=40, n/st=25600, rew=-7131.82]                            


Epoch #979: test_reward: -9161.125000 ± 14854.390080, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #980: 128001it [04:15, 501.17it/s, agent0/loss=86759.201, env_step=125440000, len=512, n/ep=60, n/st=25600, rew=-8800.73]                            


Epoch #980: test_reward: -8324.425000 ± 14408.116228, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #981: 128001it [04:14, 502.28it/s, agent0/loss=86766.288, env_step=125568000, len=512, n/ep=40, n/st=25600, rew=-9912.73]                            


Epoch #981: test_reward: -6667.125000 ± 13306.458263, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #982: 128001it [04:15, 500.65it/s, agent0/loss=86969.004, env_step=125696000, len=512, n/ep=60, n/st=25600, rew=-9734.33]                            


Epoch #982: test_reward: -9987.000000 ± 15248.853459, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #983: 128001it [04:15, 500.71it/s, agent0/loss=87043.951, env_step=125824000, len=512, n/ep=40, n/st=25600, rew=-10014.21]                            


Epoch #983: test_reward: -9989.650000 ± 15247.122174, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #984: 128001it [04:16, 499.86it/s, agent0/loss=87116.331, env_step=125952000, len=512, n/ep=60, n/st=25600, rew=-7299.48]                            


Epoch #984: test_reward: -7497.125000 ± 13892.239424, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #985: 128001it [04:15, 500.38it/s, agent0/loss=87196.120, env_step=126080000, len=512, n/ep=40, n/st=25600, rew=-9187.36]                            


Epoch #985: test_reward: -8328.250000 ± 14405.912902, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #986: 128001it [04:14, 502.08it/s, agent0/loss=87164.699, env_step=126208000, len=512, n/ep=60, n/st=25600, rew=-9189.95]                            


Epoch #986: test_reward: -6667.075000 ± 13306.483079, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #987: 128001it [04:16, 499.99it/s, agent0/loss=87155.979, env_step=126336000, len=512, n/ep=40, n/st=25600, rew=-9603.42]                            


Epoch #987: test_reward: -4167.525000 ± 11003.499028, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #988: 128001it [04:14, 502.22it/s, agent0/loss=87187.712, env_step=126464000, len=512, n/ep=60, n/st=25600, rew=-8456.46]                             


Epoch #988: test_reward: -8327.550000 ± 14406.316934, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #989: 128001it [04:19, 493.60it/s, agent0/loss=87194.178, env_step=126592000, len=512, n/ep=40, n/st=25600, rew=-10454.99]                            


Epoch #989: test_reward: -8329.200000 ± 14405.366799, best_reward: -2506.900000 ± 8762.591611 in #647


Epoch #990:  60%|######    | 76800/128000 [35:36<23:44, 35.95it/s, agent0/loss=87215.248, env_step=126643200, len=512, n/ep=40, n/st=25600, rew=-10011.96] 


KeyboardInterrupt: 

In [None]:
model = "MultiHead_LOTZ"
policyModel = "DQN"
# Create a new instance of the policy with the same architecture as the saved policy
name = 'policy_MultiHead_LOTZ_NOV01.pth' 

# policy, optim, _ = get_agents()
# model_load_path = os.path.join("policy_LOTZ", name)        

# Load the saved checkpoint
policy_test = policy.policies['agent0']
#policy_test.load_state_dict(torch.load(model_load_path ))

envs = DummyVectorEnv([_get_env for _ in range(1)])
#policy_test.eval()
policy_test.set_eps(0.00)

#collector = CustomCollector(policy.policies['agent0'], envs, exploration_noise=True)
#collector = CustomCollector(policy_test, envs, exploration_noise=False)
collector = CustomCollector(policy, envs, exploration_noise=False)

#results = collector.collect(n_episode=1)
results = collector.collect(n_episode=10)# render=0.01,)#, gym_reset_kwargs={'seed' :2})
results

In [None]:
results['rews']
print(np.mean(results['rews'][results['rews'] > -10]))


#create a function  to print a histogram of the results['rews']
import matplotlib.pyplot as plt
plt.hist(results['rews'][results['rews'] > -10], bins=100)
plt.show()
