In [1]:
import os
import datetime
from typing import Optional, Tuple
import json


os.environ["WANDB_NOTEBOOK_NAME"] = "Tianshow_Centralized_Training"

import numpy as np
import torch
from tianshou.data import Collector, VectorReplayBuffer, PrioritizedVectorReplayBuffer
from tianshou.env import DummyVectorEnv
from tianshou.env.pettingzoo_env import PettingZooEnv
from tianshou.policy import BasePolicy, DQNPolicy, MultiAgentPolicyManager, RandomPolicy, RainbowPolicy
from tianshou.trainer import OffpolicyTrainer
from torch.utils.tensorboard import SummaryWriter

from pettingzoo.sisl import pursuit_v4
from tianshou.env.pettingzoo_env_parallel import PettingZooParallelEnv

from TaskAllocation.RL_Policies.MultiHead_SISL import MultiHead_SISL
from TaskAllocation.RL_Policies.DNN_SISL import DNN_SISL
from TaskAllocation.RL_Policies.CNN_SISL import CNN_SISL
from TaskAllocation.RL_Policies.CNN_ATT_SISL import CNN_ATT_SISL
from TaskAllocation.RL_Policies.SISL_Task_MultiHead import SISL_Task_MultiHead


from Mods.MemoryBuffer import StateMemoryVectorReplayBuffer
from Mods.MemoryBuffer import MemoryOffpolicyTrainer
import Mods.MemPursuitEnv as MemPursuitEnv

import Mods.TaskPursuitEnv as TaskPursuitEnv

from TaskAllocation.RL_Policies.Custom_Classes import CustomNet
from TaskAllocation.RL_Policies.Custom_Classes import CustomCollector
from TaskAllocation.RL_Policies.Custom_Classes import CustomParallelToAECWrapper

#import Mods.TaskPursuitEnv as TaskPursuitEnv
import Mods.ActionLoggerWrapper as ActionLoggerWrapper
import Mods.VDNPolicy as VDNPolicy
import Mods.PettingZooParallelEnv2 as PettingZooParallelEnv2
import Mods.CollectorMA as CollectorMA

# Add specific modification to tianshou
import wandb
from tianshou.utils import WandbLogger
from tianshou.utils.logger.base import LOG_DATA_TYPE

def new_write(self, step_type: str, step: int, data: LOG_DATA_TYPE) -> None:
    data[step_type] = step
    wandb.log(data)
    
WandbLogger.write = new_write 

#from tianshou_DQN import train
model  =  "CNN_SISL" #"CNN_ATT_SISL" #"MultiHead_SISL" 
test_num  =  "_Desk_01"
policyModel  =  "DQN"

train_env_num = 10
test_env_num = 10

name = model + test_num

# log
now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
log_name = name + str(now)
log_path = os.path.join('./', "Logs", "dqn_sisl", log_name)

#policy
load_policy_name = f'policy_SISL_Task_MultiHead_Desk_NewExpFix240109-013813_53_BestRew.pth' #Best SISL TAsk 128emb
#load_policy_name = f'policy_CNN_SISL_SISL_NOV12_Emb128231120-134122_5079.pth' #Best SISL CNN
save_policy_name = f'policy_{log_name}'
policy_path = "dqn_SISL"

Policy_Config = {
    "same_policy" : True,
    "load_model" : False,
    "freeze_CNN" : False     
                }

SISL_Config = {
    "max_cycles": 500,         # default: 500
    "x_size": 16,              # default: 16
    "y_size": 16,              # default: 16
    "shared_reward": False,     # default: True
    "n_evaders": 30,           # default: 30
    "n_pursuers": 8,           # default: 10
    "obs_range": 7,            # default: 7
    "n_catch": 2,              # default: 2
    "freeze_evaders": False,   # default: False
    "tag_reward": 0.01,        # default: 0.01
    "catch_reward": 5.0,       # default: 5.0
    "urgency_reward": -0.1,    # default: -0.1
    "surround": True,          # default: True
    "constraint_window": 1.0,  # default: 1.0
    ###---- Additional Config ----###
    # "att_memory" : False,
    # "max_tasks" : 10  
}

max_cycles = SISL_Config["max_cycles"]
n_agents = SISL_Config["n_pursuers"]

dqn_params = {"discount_factor": 0.98, 
              "estimation_step": 5, 
              "target_update_freq": 1000 / 8,#max_cycles * n_agents,
              "optminizer": "Adam",
              "lr": 0.00075 }

trainer_params = {"max_epoch": 500,
                  "step_per_epoch": 80000 / 8,#5 * (150 * n_agents),
                  "step_per_collect": 400 / 8,# * (10 * n_agents),
                  "episode_per_test": 20,
                  "batch_size" : 32 ,
                  "update_per_step": 1 / 20 , #Only run after close a Collect (run many times as necessary to meet the value)
                  "tn_eps_max": 0.1,
                  "ts_eps_max": 0.01,
                  "warmup_size": 1
                  }


runConfig = dqn_params
runConfig.update(Policy_Config)
runConfig.update(trainer_params) 
runConfig.update(SISL_Config)

model_load_path = os.path.join(policy_path, load_policy_name)  
model_save_path = os.path.join(policy_path, save_policy_name)        
os.makedirs(os.path.join(policy_path), exist_ok=True)  
os.makedirs(os.path.join(log_path), exist_ok=True)

def _get_agents(
    agent_learn: Optional[BasePolicy] = None,
    agent_opponent: Optional[BasePolicy] = None,
    optim: Optional[torch.optim.Optimizer] = None,
    policy_load_path = None,
) -> Tuple[BasePolicy, torch.optim.Optimizer, list]:
    
    env = _get_env()       
    agent_observation_space = env.observation_space
   
    action_shape = env.action_space
    
    device="cuda" if torch.cuda.is_available() else "cpu"  

    agents = []        
    
    if Policy_Config["same_policy"]:
        policies_number = 1
    else:
        policies_number = 4#len(env.agents)

    for _ in range(policies_number):      
        
        if model == "MultiHead_SISL":
            net = MultiHead_SISL(
                obs_shape=agent_observation_space,                
                num_tasks=5,
                hidden_sizes = 32,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)
        
        if model == "SISL_Task_MultiHead":
            net = SISL_Task_MultiHead(                
                num_tasks=20,
                num_features_per_task = 14,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "DNN_SISL":
            net = DNN_SISL(
                obs_shape=agent_observation_space,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "CNN_SISL":
            net = CNN_SISL(
                obs_shape=agent_observation_space.shape,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)            
        
        if model == "CNN_ATT_SISL":
            net = CNN_ATT_SISL(
                obs_shape=agent_observation_space.shape,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)           
        
        
    
        if Policy_Config["freeze_CNN"]:                
                net.freeze_conv_layers()  # Freeze the convolutional layers

                optim = torch.optim.Adam(
                    list(net.policy_fn.parameters()) + list(net.value_fn.parameters()), 
                    lr=dqn_params["lr"]
                )
        else:
            optim = torch.optim.Adam(net.parameters(), lr=dqn_params["lr"], weight_decay=0.0, amsgrad= True )                

        if policyModel == "DQN":
            agent_learn = DQNPolicy(
                model=net,
                optim=optim,
                action_space = action_shape,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
                reward_normalization = False,
                clip_loss_grad = False 
            ) 
        
        if policyModel == "VDN":
            agent_learn = DQNPolicy(
                model=net,
                optim=optim,
                action_space = action_shape,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
                reward_normalization = False,
                clip_loss_grad = True 
            ) 
         
 
        if Policy_Config["load_model"] is True:
            # Load the saved checkpoint             
            agent_learn.load_state_dict(torch.load(model_load_path))
            print(f'Loaded-> {model_load_path}')
                   
        #print(env.agents)
        #agents = [agent_learn for _ in range(len(env.agents))]
        
        agents.append(agent_learn)

    if Policy_Config["same_policy"]:
        agents = [agents[0] for _ in range(len(env.agents))]
    else:
        for _ in range(len(env.agents) - policies_number):
            agents.append(agents[0])

    if policyModel == "DQN":
        policy = VDNPolicy.VDNMAPolicy(policies = agents, env=env, device="cuda" if torch.cuda.is_available() else "cpu" ) #MultiAgentPolicyManager(policies = agents, env=env)  

    if policyModel == "VDN":
        policy = VDNPolicy.VDNMAPolicy(policies = agents, env=env, device="cuda" if torch.cuda.is_available() else "cpu" )  

        
    return policy, optim, env.agents

def _get_env(test=False):
    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    # env = pursuit_v4.env()
    if not test:
        env = pursuit_v4.parallel_env(
        #env = TaskPursuitEnv.env(
                max_cycles=SISL_Config["max_cycles"],
                x_size=SISL_Config["x_size"],
                y_size=SISL_Config["y_size"],
                shared_reward=SISL_Config["shared_reward"],
                n_evaders=SISL_Config["n_evaders"],
                n_pursuers=SISL_Config["n_pursuers"],
                obs_range=SISL_Config["obs_range"],
                n_catch=SISL_Config["n_catch"],
                freeze_evaders=SISL_Config["freeze_evaders"],
                tag_reward=SISL_Config["tag_reward"],
                catch_reward=SISL_Config["catch_reward"],
                urgency_reward=SISL_Config["urgency_reward"],
                surround=SISL_Config["surround"],
                constraint_window=SISL_Config["constraint_window"],
                # att_memory = SISL_Config["att_memory"],
                #render_mode= "human"#True
                render_mode= None#"human"#True
            )
    else:
        env = pursuit_v4.parallel_env(
        #env = TaskPursuitEnv.env(
                max_cycles=SISL_Config["max_cycles"],
                x_size=SISL_Config["x_size"],
                y_size=SISL_Config["y_size"],
                shared_reward=SISL_Config["shared_reward"],
                n_evaders=SISL_Config["n_evaders"],
                n_pursuers=SISL_Config["n_pursuers"],
                obs_range=SISL_Config["obs_range"],
                n_catch=SISL_Config["n_catch"],
                freeze_evaders=SISL_Config["freeze_evaders"],
                tag_reward=SISL_Config["tag_reward"],
                catch_reward=SISL_Config["catch_reward"],
                urgency_reward=SISL_Config["urgency_reward"],
                surround=SISL_Config["surround"],
                constraint_window=SISL_Config["constraint_window"],
                # att_memory = SISL_Config["att_memory"],
                #render_mode= "human"#True
                render_mode= None#"human"#True
            )

    
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    env = PettingZooParallelEnv(env)
    return env
    
    #return PettingZooEnv(env)   

# print(json.dumps(runConfig, indent=4))


In [2]:
if __name__ == "__main__":
                        
    torch.set_grad_enabled(True) 
   
    # ======== Step 1: Environment setup =========
    train_envs = DummyVectorEnv([_get_env for _ in range(train_env_num)])
    test_envs = DummyVectorEnv([_get_env for _ in range(test_env_num)]) 

    # seed
    seed = 0
    np.random.seed(seed)
    
    torch.manual_seed(seed)

    train_envs.seed(seed)
    test_envs.seed(seed)

    # ======== Step 2: Agent setup =========
    policy, optim, agents = _get_agents()    

    if True:
        agents_buffers_training = {agent : 
                           PrioritizedVectorReplayBuffer( 300_000, 
                                                          len(train_envs), 
                                                          alpha=0.6, 
                                                          beta=0.4) 
                                                          for agent in agents
                         }
        agents_buffers_test = {agent : 
                           PrioritizedVectorReplayBuffer( 300_000, 
                                                          len(train_envs), 
                                                          alpha=0.6, 
                                                          beta=0.4) 
                                                          for agent in agents
                         }
    
        # ======== Step 3: Collector setup =========
        train_collector = CollectorMA.CollectorMA(
            policy,
            train_envs,
            agents_buffers_training,                        
            exploration_noise=True             
        )
        test_collector = CollectorMA.CollectorMA(policy, test_envs, agents_buffers_test, exploration_noise=True)

    if False:
         # ======== Step 3: Collector setup =========
        train_collector = Collector(
        policy,
        train_envs,
        # VectorReplayBuffer(300_000, len(train_envs)),
        PrioritizedVectorReplayBuffer( 300_000, len(train_envs), alpha=0.6, beta=0.4) , 
        #ListReplayBuffer(100000)       
        # buffer = StateMemoryVectorReplayBuffer(
        #         300_000,
        #         len(train_envs),  # Assuming train_envs is your vectorized environment
        #         memory_size=10,                
        #     ),
        exploration_noise=True             
        )
        test_collector = Collector(policy, test_envs, exploration_noise=True)
        
    print("Buffer Warming Up ")    
    # for i in range(trainer_params["warmup_size"]):#int(trainer_params['batch_size'] / (300 * 10 ) )):
        
    #     train_collector.collect(n_episode=train_env_num)#,random=True) #trainer_params['batch_size'] * train_env_num))
    #     #train_collector.collect(n_step=300 * 10)
    #     print(".", end="") 
    
    # len_buffer = len(train_collector.buffer) / (Spread_Config["max_cycles"] * Spread_Config["N"])
    # print("\nBuffer Lenght: ", len_buffer ) 
    
    info = { "Buffer"  : "PriorizedReplayBuffer", " Warmup_ep" : runConfig["warmup_size"]}
    # ======== tensorboard logging setup =========                       
    logger = WandbLogger(
        train_interval = runConfig["max_cycles"] * runConfig["n_pursuers"] ,
        test_interval = 1,#runConfig["max_cycles"] * runConfig["n_pursuers"],
        update_interval = runConfig["max_cycles"],
        save_interval = 1,
        write_flush = True,
        project = "SISL_Eval01",
        name = log_name,
        entity = None,
        run_id = log_name,
        config = runConfig,
        monitor_gym = True )
    
    writer = SummaryWriter(log_path)    
    writer.add_text("args", str(runConfig))    
    logger.load(writer)

    
    global_step_holder = [0] 
    
    
    # ======== Step 4: Callback functions setup =========
    def save_best_fn(policy):                
        
        if Policy_Config["same_policy"]:
            torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_BestRew.pth")
            print("Best Saved Rew" , str(global_step_holder[0]))
        
        else:
            for n,agent in enumerate(agents):
                torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + ".pth")
            
            print("Bests Saved Rew" , str(global_step_holder[0]))
        
    def save_test_best_fn(policy):                
        
        if Policy_Config["same_policy"]:
            torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_BestLen.pth")
            print("Best Saved Length" , str(global_step_holder[0]))
        
        else:
            for n,agent in enumerate(agents):
                torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + ".pth")
            
            print("Best Saved Length" , str(global_step_holder[0]))
        

    def stop_fn(mean_rewards):
        return mean_rewards >= 99999939.0

    def train_fn(epoch, env_step):
        epsilon = trainer_params['tn_eps_max'] - (trainer_params['tn_eps_max'] - trainer_params['tn_eps_max']/100)*(epoch/trainer_params['max_epoch'])          
        if Policy_Config["same_policy"]:
            policy.policies[agents[0]].set_eps(epsilon)
        else:
            for agent in agents:
                policy.policies[agent].set_eps(epsilon)
                
        
        # if env_step % 500 == 0:
            # logger.write("train/env_step", env_step, {"train/eps": eps})


    def test_fn(epoch, env_step):
               
        epsilon = trainer_params['ts_eps_max']#0.01#max(0.001, 0.1 - epoch * 0.001)
        if Policy_Config["same_policy"]:
            policy.policies[agents[0]].set_eps(epsilon)
        else:            
            for agent in agents:                             
                 policy.policies[agent].set_eps(epsilon)
                
        
        if global_step_holder[0] % 10 == 0:
            
            if Policy_Config["same_policy"]:
                torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_Step.pth")
                print("Steps Policy Saved " , str(global_step_holder[0]))
            
            else:
                for n,agent in enumerate(agents):
                    torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + "Step" + str(global_step_holder[0]) + ".pth")
                
                print("Steps Policy Saved " , str(global_step_holder[0]))
        
    def reward_metric(rews):       
                
        global_step_holder[0] +=1         
        return rews


    # # ======== Step 5: Run the trainer =========
    offPolicyTrainer = OffpolicyTrainer(
        policy=policy,
        train_collector=train_collector,
        test_collector=test_collector,        
        max_epoch=trainer_params['max_epoch'],
        step_per_epoch=trainer_params['step_per_epoch'],
        step_per_collect=trainer_params['step_per_collect'],        
        episode_per_test= trainer_params['episode_per_test'],
        batch_size=trainer_params['batch_size'],
        train_fn=train_fn,
        test_fn=test_fn,
        stop_fn=stop_fn,
        save_best_fn=save_best_fn,
        # save_test_best_fn=save_test_best_fn,
        update_per_step=trainer_params['update_per_step'],
        logger=logger,
        test_in_train=True,
        reward_metric=reward_metric,
        show_progress = True 
               
        )
    
    result = offPolicyTrainer.run()
    writer.close()
    # return result, policy.policies[agents[1]]
    print(f"\n==========Result==========\n{result}")
    print("\n(the trained policy can be accessed via policy.policies[agents[0]])")



Buffer Warming Up 


[34m[1mwandb[0m: Currently logged in as: [33mandrekuros[0m. Use [1m`wandb login --relogin`[0m to force relogin
  from IPython.core.display import HTML, display  # type: ignore


Steps Policy Saved  0
Best Saved Rew 1


Epoch #1: 10001it [02:36, 63.97it/s, env_step=10000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.145, pursuer_1/loss=1.104, pursuer_2/loss=1.179, pursuer_3/loss=1.192, pursuer_4/loss=1.238, pursuer_5/loss=1.382, pursuer_6/loss=1.179, pursuer_7/loss=1.135, rew=-7.60]                             


Best Saved Rew 4
Epoch #1: test_reward: -12.655563 ± 10.861654, best_reward: -12.655563 ± 10.861654 in #1


Epoch #2: 10001it [03:04, 54.10it/s, env_step=20000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.260, pursuer_1/loss=1.365, pursuer_2/loss=1.336, pursuer_3/loss=1.360, pursuer_4/loss=1.424, pursuer_5/loss=1.328, pursuer_6/loss=1.345, pursuer_7/loss=1.289, rew=-3.36]                             


Best Saved Rew 7
Epoch #2: test_reward: -6.029500 ± 18.101603, best_reward: -6.029500 ± 18.101603 in #2


Epoch #3: 10001it [03:00, 55.34it/s, env_step=30000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.437, pursuer_1/loss=1.534, pursuer_2/loss=1.509, pursuer_3/loss=1.474, pursuer_4/loss=1.367, pursuer_5/loss=1.380, pursuer_6/loss=1.342, pursuer_7/loss=1.324, rew=-3.70]                             


Epoch #3: test_reward: -27.514563 ± 9.952611, best_reward: -6.029500 ± 18.101603 in #2


Epoch #4: 10001it [03:00, 55.30it/s, env_step=40000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.523, pursuer_1/loss=1.568, pursuer_2/loss=1.561, pursuer_3/loss=1.555, pursuer_4/loss=1.530, pursuer_5/loss=1.570, pursuer_6/loss=1.413, pursuer_7/loss=1.564, rew=16.08]                             


Epoch #4: test_reward: -24.124313 ± 12.486364, best_reward: -6.029500 ± 18.101603 in #2


Epoch #5: 10001it [03:00, 55.37it/s, env_step=50000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.506, pursuer_1/loss=1.540, pursuer_2/loss=1.302, pursuer_3/loss=1.419, pursuer_4/loss=1.339, pursuer_5/loss=1.321, pursuer_6/loss=1.432, pursuer_7/loss=1.393, rew=14.11]                             


Epoch #5: test_reward: -12.552625 ± 18.633348, best_reward: -6.029500 ± 18.101603 in #2


Epoch #6: 10001it [02:10, 76.84it/s, env_step=60000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.602, pursuer_1/loss=1.589, pursuer_2/loss=1.574, pursuer_3/loss=1.516, pursuer_4/loss=1.430, pursuer_5/loss=1.517, pursuer_6/loss=1.438, pursuer_7/loss=1.617, rew=17.66]                             


Epoch #6: test_reward: -7.111875 ± 14.811083, best_reward: -6.029500 ± 18.101603 in #2


Epoch #7: 10001it [02:11, 75.83it/s, env_step=70000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.408, pursuer_1/loss=1.483, pursuer_2/loss=1.559, pursuer_3/loss=1.536, pursuer_4/loss=1.475, pursuer_5/loss=1.520, pursuer_6/loss=1.562, pursuer_7/loss=1.330, rew=3.73]                             


Epoch #7: test_reward: -17.475813 ± 15.951347, best_reward: -6.029500 ± 18.101603 in #2


Epoch #8: 10001it [02:10, 76.41it/s, env_step=80000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.359, pursuer_1/loss=1.447, pursuer_2/loss=1.485, pursuer_3/loss=1.608, pursuer_4/loss=1.390, pursuer_5/loss=1.398, pursuer_6/loss=1.385, pursuer_7/loss=1.328, rew=10.28]                             


Epoch #8: test_reward: -18.115875 ± 14.057153, best_reward: -6.029500 ± 18.101603 in #2


Epoch #9: 10001it [02:11, 76.30it/s, env_step=90000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.505, pursuer_1/loss=1.503, pursuer_2/loss=1.583, pursuer_3/loss=1.752, pursuer_4/loss=1.502, pursuer_5/loss=1.682, pursuer_6/loss=1.612, pursuer_7/loss=1.668, rew=1.73]                             


Epoch #9: test_reward: -23.901438 ± 12.469951, best_reward: -6.029500 ± 18.101603 in #2


Epoch #10: 10001it [02:11, 75.90it/s, env_step=100000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.481, pursuer_1/loss=1.550, pursuer_2/loss=1.436, pursuer_3/loss=1.402, pursuer_4/loss=1.282, pursuer_5/loss=1.512, pursuer_6/loss=1.419, pursuer_7/loss=1.418, rew=8.54]                             


Steps Policy Saved  30
Epoch #10: test_reward: -6.870937 ± 15.195223, best_reward: -6.029500 ± 18.101603 in #2


Epoch #11: 10001it [02:11, 75.95it/s, env_step=110000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.502, pursuer_1/loss=1.551, pursuer_2/loss=1.565, pursuer_3/loss=1.525, pursuer_4/loss=1.442, pursuer_5/loss=1.560, pursuer_6/loss=1.555, pursuer_7/loss=1.616, rew=6.04]                             


Epoch #11: test_reward: -6.815875 ± 15.326038, best_reward: -6.029500 ± 18.101603 in #2


Epoch #12: 10001it [02:11, 76.05it/s, env_step=120000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.486, pursuer_1/loss=1.471, pursuer_2/loss=1.575, pursuer_3/loss=1.510, pursuer_4/loss=1.457, pursuer_5/loss=1.445, pursuer_6/loss=1.569, pursuer_7/loss=1.416, rew=0.83]                             


Best Saved Rew 37
Epoch #12: test_reward: -1.080250 ± 17.313078, best_reward: -1.080250 ± 17.313078 in #12


Epoch #13: 10001it [02:10, 76.50it/s, env_step=130000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.542, pursuer_1/loss=1.552, pursuer_2/loss=1.552, pursuer_3/loss=1.504, pursuer_4/loss=1.480, pursuer_5/loss=1.469, pursuer_6/loss=1.506, pursuer_7/loss=1.404, rew=16.99]                             


Epoch #13: test_reward: -1.596125 ± 12.380211, best_reward: -1.080250 ± 17.313078 in #12


Epoch #14: 10001it [02:10, 76.71it/s, env_step=140000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.488, pursuer_1/loss=1.525, pursuer_2/loss=1.536, pursuer_3/loss=1.496, pursuer_4/loss=1.532, pursuer_5/loss=1.404, pursuer_6/loss=1.500, pursuer_7/loss=1.569, rew=18.02]                             


Epoch #14: test_reward: -17.696375 ± 14.808459, best_reward: -1.080250 ± 17.313078 in #12


Epoch #15: 10001it [02:11, 76.27it/s, env_step=150000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.507, pursuer_1/loss=1.463, pursuer_2/loss=1.534, pursuer_3/loss=1.450, pursuer_4/loss=1.563, pursuer_5/loss=1.489, pursuer_6/loss=1.595, pursuer_7/loss=1.590, rew=6.38]                             


Epoch #15: test_reward: -6.121813 ± 14.619880, best_reward: -1.080250 ± 17.313078 in #12


Epoch #16: 10001it [02:11, 76.34it/s, env_step=160000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.550, pursuer_1/loss=1.596, pursuer_2/loss=1.518, pursuer_3/loss=1.462, pursuer_4/loss=1.406, pursuer_5/loss=1.435, pursuer_6/loss=1.412, pursuer_7/loss=1.410, rew=10.41]                             


Epoch #16: test_reward: -4.274688 ± 14.331790, best_reward: -1.080250 ± 17.313078 in #12


Epoch #17: 10001it [02:08, 77.62it/s, env_step=170000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.531, pursuer_1/loss=1.625, pursuer_2/loss=1.692, pursuer_3/loss=1.615, pursuer_4/loss=1.616, pursuer_5/loss=1.643, pursuer_6/loss=1.562, pursuer_7/loss=1.443, rew=4.60]                             


Epoch #17: test_reward: -15.759625 ± 13.498058, best_reward: -1.080250 ± 17.313078 in #12


Epoch #18: 10001it [02:10, 76.38it/s, env_step=180000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.519, pursuer_1/loss=1.509, pursuer_2/loss=1.499, pursuer_3/loss=1.419, pursuer_4/loss=1.542, pursuer_5/loss=1.595, pursuer_6/loss=1.601, pursuer_7/loss=1.516, rew=16.23]                             


Epoch #18: test_reward: -14.064688 ± 12.436685, best_reward: -1.080250 ± 17.313078 in #12


Epoch #19: 10001it [02:10, 76.65it/s, env_step=190000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.688, pursuer_1/loss=1.618, pursuer_2/loss=1.660, pursuer_3/loss=1.620, pursuer_4/loss=1.718, pursuer_5/loss=1.713, pursuer_6/loss=1.663, pursuer_7/loss=1.569, rew=8.37]                             


Epoch #19: test_reward: -7.802375 ± 18.696922, best_reward: -1.080250 ± 17.313078 in #12


Epoch #20: 10001it [02:11, 76.11it/s, env_step=200000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.614, pursuer_1/loss=1.579, pursuer_2/loss=1.505, pursuer_3/loss=1.521, pursuer_4/loss=1.526, pursuer_5/loss=1.536, pursuer_6/loss=1.489, pursuer_7/loss=1.635, rew=7.20]                             


Steps Policy Saved  60
Epoch #20: test_reward: -7.848750 ± 11.259977, best_reward: -1.080250 ± 17.313078 in #12


Epoch #21: 10001it [02:10, 76.54it/s, env_step=210000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.588, pursuer_1/loss=1.592, pursuer_2/loss=1.503, pursuer_3/loss=1.605, pursuer_4/loss=1.565, pursuer_5/loss=1.500, pursuer_6/loss=1.575, pursuer_7/loss=1.489, rew=5.15]                             


Epoch #21: test_reward: -11.712500 ± 13.721544, best_reward: -1.080250 ± 17.313078 in #12


Epoch #22: 10001it [02:08, 77.71it/s, env_step=220000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.554, pursuer_1/loss=1.577, pursuer_2/loss=1.639, pursuer_3/loss=1.526, pursuer_4/loss=1.528, pursuer_5/loss=1.532, pursuer_6/loss=1.605, pursuer_7/loss=1.518, rew=8.59]                             


Best Saved Rew 67
Epoch #22: test_reward: 6.750750 ± 13.341351, best_reward: 6.750750 ± 13.341351 in #22


Epoch #23: 10001it [02:10, 76.55it/s, env_step=230000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.540, pursuer_1/loss=1.621, pursuer_2/loss=1.545, pursuer_3/loss=1.703, pursuer_4/loss=1.578, pursuer_5/loss=1.498, pursuer_6/loss=1.608, pursuer_7/loss=1.543, rew=4.42]                             


Epoch #23: test_reward: -6.490687 ± 17.455895, best_reward: 6.750750 ± 13.341351 in #22


Epoch #24: 10001it [02:10, 76.81it/s, env_step=240000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.492, pursuer_1/loss=1.442, pursuer_2/loss=1.650, pursuer_3/loss=1.429, pursuer_4/loss=1.516, pursuer_5/loss=1.504, pursuer_6/loss=1.576, pursuer_7/loss=1.567, rew=8.89]                             


Epoch #24: test_reward: -18.045188 ± 14.299708, best_reward: 6.750750 ± 13.341351 in #22


Epoch #25: 10001it [02:12, 75.25it/s, env_step=250000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.626, pursuer_1/loss=1.629, pursuer_2/loss=1.821, pursuer_3/loss=1.595, pursuer_4/loss=1.608, pursuer_5/loss=1.621, pursuer_6/loss=1.621, pursuer_7/loss=1.664, rew=7.85]                             


Epoch #25: test_reward: -12.605813 ± 21.684519, best_reward: 6.750750 ± 13.341351 in #22


Epoch #26: 10001it [02:12, 75.72it/s, env_step=260000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.497, pursuer_1/loss=1.519, pursuer_2/loss=1.586, pursuer_3/loss=1.609, pursuer_4/loss=1.513, pursuer_5/loss=1.507, pursuer_6/loss=1.568, pursuer_7/loss=1.603, rew=10.35]                             


Epoch #26: test_reward: -15.418000 ± 16.962723, best_reward: 6.750750 ± 13.341351 in #22


Epoch #27: 10001it [02:10, 76.90it/s, env_step=270000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.662, pursuer_1/loss=1.552, pursuer_2/loss=1.609, pursuer_3/loss=1.628, pursuer_4/loss=1.615, pursuer_5/loss=1.401, pursuer_6/loss=1.513, pursuer_7/loss=1.469, rew=6.09]                             


Epoch #27: test_reward: -16.012750 ± 15.361034, best_reward: 6.750750 ± 13.341351 in #22


Epoch #28: 10001it [02:09, 77.48it/s, env_step=280000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.670, pursuer_1/loss=1.455, pursuer_2/loss=1.595, pursuer_3/loss=1.686, pursuer_4/loss=1.455, pursuer_5/loss=1.591, pursuer_6/loss=1.525, pursuer_7/loss=1.534, rew=8.75]                             


Epoch #28: test_reward: -10.468500 ± 19.851819, best_reward: 6.750750 ± 13.341351 in #22


Epoch #29: 10001it [02:11, 76.31it/s, env_step=290000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.537, pursuer_1/loss=1.499, pursuer_2/loss=1.595, pursuer_3/loss=1.648, pursuer_4/loss=1.662, pursuer_5/loss=1.727, pursuer_6/loss=1.674, pursuer_7/loss=1.504, rew=10.39]                             


Epoch #29: test_reward: -8.018375 ± 15.966239, best_reward: 6.750750 ± 13.341351 in #22


Epoch #30: 10001it [02:10, 76.53it/s, env_step=300000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.538, pursuer_1/loss=1.527, pursuer_2/loss=1.502, pursuer_3/loss=1.500, pursuer_4/loss=1.443, pursuer_5/loss=1.528, pursuer_6/loss=1.429, pursuer_7/loss=1.406, rew=16.88]                             


Steps Policy Saved  90
Epoch #30: test_reward: -8.750875 ± 12.321397, best_reward: 6.750750 ± 13.341351 in #22


Epoch #31: 10001it [02:09, 77.10it/s, env_step=310000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.645, pursuer_1/loss=1.508, pursuer_2/loss=1.582, pursuer_3/loss=1.616, pursuer_4/loss=1.487, pursuer_5/loss=1.594, pursuer_6/loss=1.631, pursuer_7/loss=1.411, rew=10.91]                             


Epoch #31: test_reward: -10.561563 ± 17.338313, best_reward: 6.750750 ± 13.341351 in #22


Epoch #32: 10001it [02:09, 77.08it/s, env_step=320000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.640, pursuer_1/loss=1.694, pursuer_2/loss=1.688, pursuer_3/loss=1.727, pursuer_4/loss=1.658, pursuer_5/loss=1.623, pursuer_6/loss=1.619, pursuer_7/loss=1.606, rew=19.56]                             


Epoch #32: test_reward: 3.750000 ± 16.229900, best_reward: 6.750750 ± 13.341351 in #22


Epoch #33: 10001it [02:07, 78.23it/s, env_step=330000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.671, pursuer_1/loss=1.553, pursuer_2/loss=1.544, pursuer_3/loss=1.661, pursuer_4/loss=1.709, pursuer_5/loss=1.655, pursuer_6/loss=1.580, pursuer_7/loss=1.582, rew=11.49]                             


Epoch #33: test_reward: -4.283938 ± 15.394075, best_reward: 6.750750 ± 13.341351 in #22


Epoch #34: 10001it [02:09, 77.38it/s, env_step=340000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.666, pursuer_1/loss=1.630, pursuer_2/loss=1.715, pursuer_3/loss=1.723, pursuer_4/loss=1.593, pursuer_5/loss=1.747, pursuer_6/loss=1.590, pursuer_7/loss=1.601, rew=19.92]                             


Epoch #34: test_reward: -6.868625 ± 16.733056, best_reward: 6.750750 ± 13.341351 in #22


Epoch #35: 10001it [02:09, 77.07it/s, env_step=350000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.596, pursuer_1/loss=1.585, pursuer_2/loss=1.591, pursuer_3/loss=1.606, pursuer_4/loss=1.602, pursuer_5/loss=1.672, pursuer_6/loss=1.591, pursuer_7/loss=1.576, rew=17.29]                             


Epoch #35: test_reward: 5.517750 ± 16.842247, best_reward: 6.750750 ± 13.341351 in #22


Epoch #36: 10001it [02:10, 76.81it/s, env_step=360000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.451, pursuer_1/loss=1.534, pursuer_2/loss=1.611, pursuer_3/loss=1.561, pursuer_4/loss=1.604, pursuer_5/loss=1.597, pursuer_6/loss=1.430, pursuer_7/loss=1.414, rew=12.15]                             


Epoch #36: test_reward: -6.620687 ± 14.045081, best_reward: 6.750750 ± 13.341351 in #22


Epoch #37: 10001it [02:08, 77.72it/s, env_step=370000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.669, pursuer_1/loss=1.637, pursuer_2/loss=1.526, pursuer_3/loss=1.703, pursuer_4/loss=1.563, pursuer_5/loss=1.784, pursuer_6/loss=1.608, pursuer_7/loss=1.600, rew=20.22]                             


Epoch #37: test_reward: -1.907813 ± 19.073545, best_reward: 6.750750 ± 13.341351 in #22


Epoch #38: 10001it [02:08, 77.84it/s, env_step=380000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.708, pursuer_1/loss=1.635, pursuer_2/loss=1.655, pursuer_3/loss=1.698, pursuer_4/loss=1.628, pursuer_5/loss=1.765, pursuer_6/loss=1.791, pursuer_7/loss=1.538, rew=12.35]                             


Epoch #38: test_reward: -2.833438 ± 17.357083, best_reward: 6.750750 ± 13.341351 in #22


Epoch #39: 10001it [02:31, 66.14it/s, env_step=390000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.778, pursuer_1/loss=1.642, pursuer_2/loss=1.731, pursuer_3/loss=1.709, pursuer_4/loss=1.682, pursuer_5/loss=1.820, pursuer_6/loss=1.810, pursuer_7/loss=1.606, rew=21.91]                             


Epoch #39: test_reward: -2.750125 ± 13.233260, best_reward: 6.750750 ± 13.341351 in #22


Epoch #40: 10001it [02:31, 65.88it/s, env_step=400000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.534, pursuer_1/loss=1.737, pursuer_2/loss=1.701, pursuer_3/loss=1.590, pursuer_4/loss=1.678, pursuer_5/loss=1.708, pursuer_6/loss=1.603, pursuer_7/loss=1.794, rew=7.58]                             


Steps Policy Saved  120
Epoch #40: test_reward: -2.706875 ± 15.428162, best_reward: 6.750750 ± 13.341351 in #22


Epoch #41: 10001it [02:32, 65.72it/s, env_step=410000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.840, pursuer_1/loss=1.682, pursuer_2/loss=1.594, pursuer_3/loss=1.638, pursuer_4/loss=1.570, pursuer_5/loss=1.565, pursuer_6/loss=1.839, pursuer_7/loss=1.567, rew=8.90]                             


Epoch #41: test_reward: -1.006875 ± 13.711726, best_reward: 6.750750 ± 13.341351 in #22


Epoch #42: 10001it [02:29, 66.68it/s, env_step=420000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.620, pursuer_1/loss=1.614, pursuer_2/loss=1.688, pursuer_3/loss=1.707, pursuer_4/loss=1.752, pursuer_5/loss=1.747, pursuer_6/loss=1.594, pursuer_7/loss=1.569, rew=7.76]                             


Epoch #42: test_reward: 3.751500 ± 19.793426, best_reward: 6.750750 ± 13.341351 in #22


Epoch #43: 10001it [02:31, 66.04it/s, env_step=430000, len=500, n/ep=10, n/st=50, pursuer_0/loss=1.775, pursuer_1/loss=1.687, pursuer_2/loss=1.764, pursuer_3/loss=1.834, pursuer_4/loss=1.776, pursuer_5/loss=1.734, pursuer_6/loss=1.591, pursuer_7/loss=1.676, rew=24.43]                             


Epoch #43: test_reward: 1.253937 ± 16.979911, best_reward: 6.750750 ± 13.341351 in #22


Epoch #44: 10001it [02:26, 68.44it/s, env_step=440000, len=500, n/ep=9, n/st=50, pursuer_0/loss=1.660, pursuer_1/loss=1.606, pursuer_2/loss=1.714, pursuer_3/loss=1.712, pursuer_4/loss=1.686, pursuer_5/loss=1.688, pursuer_6/loss=1.551, pursuer_7/loss=1.925, rew=22.83]                             


Epoch #44: test_reward: -27.315000 ± 19.885400, best_reward: 6.750750 ± 13.341351 in #22


Epoch #45: 10001it [02:28, 67.42it/s, env_step=450000, len=500, n/ep=9, n/st=50, pursuer_0/loss=1.708, pursuer_1/loss=1.952, pursuer_2/loss=1.735, pursuer_3/loss=1.615, pursuer_4/loss=1.584, pursuer_5/loss=1.887, pursuer_6/loss=1.657, pursuer_7/loss=1.678, rew=19.10]                             


Epoch #45: test_reward: -0.528500 ± 13.890763, best_reward: 6.750750 ± 13.341351 in #22


Epoch #46: 10001it [02:30, 66.64it/s, env_step=460000, len=500, n/ep=9, n/st=50, pursuer_0/loss=1.808, pursuer_1/loss=1.764, pursuer_2/loss=1.834, pursuer_3/loss=1.806, pursuer_4/loss=1.704, pursuer_5/loss=1.769, pursuer_6/loss=1.856, pursuer_7/loss=1.710, rew=26.34]                             


Epoch #46: test_reward: 2.919312 ± 13.404610, best_reward: 6.750750 ± 13.341351 in #22


Epoch #47: 10001it [02:31, 66.13it/s, env_step=470000, len=500, n/ep=9, n/st=50, pursuer_0/loss=1.726, pursuer_1/loss=1.813, pursuer_2/loss=1.643, pursuer_3/loss=1.803, pursuer_4/loss=1.781, pursuer_5/loss=1.742, pursuer_6/loss=1.629, pursuer_7/loss=1.584, rew=10.52]                             


Epoch #47: test_reward: 2.380750 ± 13.615101, best_reward: 6.750750 ± 13.341351 in #22


Epoch #48: 10001it [02:26, 68.15it/s, env_step=480000, len=500, n/ep=9, n/st=50, pursuer_0/loss=1.802, pursuer_1/loss=1.834, pursuer_2/loss=1.668, pursuer_3/loss=1.743, pursuer_4/loss=1.688, pursuer_5/loss=1.736, pursuer_6/loss=1.870, pursuer_7/loss=1.841, rew=19.99]                             


Epoch #48: test_reward: 5.555562 ± 18.126222, best_reward: 6.750750 ± 13.341351 in #22


Epoch #49: 10001it [02:29, 66.83it/s, env_step=490000, len=500, n/ep=9, n/st=50, pursuer_0/loss=1.865, pursuer_1/loss=1.846, pursuer_2/loss=1.751, pursuer_3/loss=1.926, pursuer_4/loss=1.640, pursuer_5/loss=1.861, pursuer_6/loss=1.834, pursuer_7/loss=1.812, rew=24.95]                             


Epoch #49: test_reward: -8.461438 ± 14.121079, best_reward: 6.750750 ± 13.341351 in #22


Epoch #50: 10001it [02:28, 67.18it/s, env_step=500000, len=500, n/ep=8, n/st=50, pursuer_0/loss=1.781, pursuer_1/loss=1.863, pursuer_2/loss=1.838, pursuer_3/loss=1.842, pursuer_4/loss=1.794, pursuer_5/loss=1.839, pursuer_6/loss=1.731, pursuer_7/loss=1.660, rew=24.99]                             


Epoch #50: test_reward: -10.264312 ± 15.357021, best_reward: 6.750750 ± 13.341351 in #22


Epoch #51: 10001it [02:28, 67.41it/s, env_step=510000, len=500, n/ep=8, n/st=50, pursuer_0/loss=2.025, pursuer_1/loss=1.919, pursuer_2/loss=1.959, pursuer_3/loss=1.828, pursuer_4/loss=1.779, pursuer_5/loss=1.890, pursuer_6/loss=1.851, pursuer_7/loss=1.853, rew=33.45]                             


Epoch #51: test_reward: -5.885938 ± 18.816643, best_reward: 6.750750 ± 13.341351 in #22


Epoch #52: 10001it [02:30, 66.50it/s, env_step=520000, len=500, n/ep=8, n/st=50, pursuer_0/loss=1.753, pursuer_1/loss=1.901, pursuer_2/loss=1.827, pursuer_3/loss=1.699, pursuer_4/loss=1.727, pursuer_5/loss=1.790, pursuer_6/loss=1.863, pursuer_7/loss=1.648, rew=16.24]                             


Steps Policy Saved  180
Epoch #52: test_reward: 5.926875 ± 19.341033, best_reward: 6.750750 ± 13.341351 in #22


Epoch #53: 10001it [02:29, 66.95it/s, env_step=530000, len=500, n/ep=8, n/st=50, pursuer_0/loss=1.988, pursuer_1/loss=2.042, pursuer_2/loss=1.786, pursuer_3/loss=2.002, pursuer_4/loss=1.939, pursuer_5/loss=2.075, pursuer_6/loss=1.859, pursuer_7/loss=1.792, rew=5.74]                              


Epoch #53: test_reward: -15.856188 ± 16.736367, best_reward: 6.750750 ± 13.341351 in #22


Epoch #54: 10001it [02:29, 66.73it/s, env_step=540000, len=500, n/ep=7, n/st=50, pursuer_0/loss=1.931, pursuer_1/loss=1.892, pursuer_2/loss=1.975, pursuer_3/loss=2.151, pursuer_4/loss=1.806, pursuer_5/loss=1.945, pursuer_6/loss=1.831, pursuer_7/loss=2.112, rew=15.63]                             


Epoch #54: test_reward: -0.023063 ± 15.113483, best_reward: 6.750750 ± 13.341351 in #22


Epoch #55: 10001it [02:25, 68.74it/s, env_step=550000, len=500, n/ep=7, n/st=50, pursuer_0/loss=1.866, pursuer_1/loss=1.810, pursuer_2/loss=1.864, pursuer_3/loss=1.935, pursuer_4/loss=1.796, pursuer_5/loss=1.860, pursuer_6/loss=1.874, pursuer_7/loss=1.973, rew=18.73]                             


Epoch #55: test_reward: -7.493875 ± 18.072836, best_reward: 6.750750 ± 13.341351 in #22


Epoch #56: 10001it [02:28, 67.44it/s, env_step=560000, len=500, n/ep=7, n/st=50, pursuer_0/loss=1.931, pursuer_1/loss=1.898, pursuer_2/loss=1.927, pursuer_3/loss=1.888, pursuer_4/loss=2.044, pursuer_5/loss=1.864, pursuer_6/loss=2.011, pursuer_7/loss=2.138, rew=16.11]                             


Epoch #56: test_reward: -12.518937 ± 13.710258, best_reward: 6.750750 ± 13.341351 in #22


Epoch #57: 10001it [02:30, 66.63it/s, env_step=570000, len=500, n/ep=7, n/st=50, pursuer_0/loss=2.053, pursuer_1/loss=2.025, pursuer_2/loss=2.124, pursuer_3/loss=2.034, pursuer_4/loss=1.997, pursuer_5/loss=1.913, pursuer_6/loss=2.165, pursuer_7/loss=2.307, rew=20.11]                             


Epoch #57: test_reward: -17.887500 ± 16.387879, best_reward: 6.750750 ± 13.341351 in #22


Epoch #58: 10001it [02:28, 67.35it/s, env_step=580000, len=500, n/ep=7, n/st=50, pursuer_0/loss=1.906, pursuer_1/loss=1.833, pursuer_2/loss=2.007, pursuer_3/loss=1.972, pursuer_4/loss=1.908, pursuer_5/loss=1.969, pursuer_6/loss=1.915, pursuer_7/loss=1.889, rew=11.17]                             


Best Saved Rew 233
Epoch #58: test_reward: 7.865812 ± 20.093255, best_reward: 7.865812 ± 20.093255 in #58


Epoch #59: 10001it [02:25, 68.93it/s, env_step=590000, len=500, n/ep=6, n/st=50, pursuer_0/loss=1.939, pursuer_1/loss=2.209, pursuer_2/loss=2.054, pursuer_3/loss=1.904, pursuer_4/loss=2.133, pursuer_5/loss=2.042, pursuer_6/loss=2.156, pursuer_7/loss=2.059, rew=23.69]                             


Epoch #59: test_reward: -12.571062 ± 14.792495, best_reward: 7.865812 ± 20.093255 in #58


Epoch #60: 10001it [02:30, 66.28it/s, env_step=600000, len=500, n/ep=5, n/st=50, pursuer_0/loss=1.976, pursuer_1/loss=1.995, pursuer_2/loss=1.930, pursuer_3/loss=1.853, pursuer_4/loss=1.865, pursuer_5/loss=1.851, pursuer_6/loss=1.911, pursuer_7/loss=1.924, rew=30.55]                             


Epoch #60: test_reward: 3.517687 ± 20.429472, best_reward: 7.865812 ± 20.093255 in #58


Epoch #61: 10001it [02:30, 66.28it/s, env_step=610000, len=500, n/ep=4, n/st=50, pursuer_0/loss=2.071, pursuer_1/loss=1.983, pursuer_2/loss=2.096, pursuer_3/loss=2.110, pursuer_4/loss=2.178, pursuer_5/loss=2.128, pursuer_6/loss=2.035, pursuer_7/loss=2.154, rew=1.72]                              


Epoch #61: test_reward: 0.526000 ± 15.997892, best_reward: 7.865812 ± 20.093255 in #58


Epoch #62: 10001it [02:30, 66.54it/s, env_step=620000, len=500, n/ep=4, n/st=50, pursuer_0/loss=2.004, pursuer_1/loss=2.107, pursuer_2/loss=2.172, pursuer_3/loss=2.194, pursuer_4/loss=2.215, pursuer_5/loss=2.065, pursuer_6/loss=1.914, pursuer_7/loss=2.088, rew=21.93]                             


Epoch #62: test_reward: -13.977187 ± 13.511298, best_reward: 7.865812 ± 20.093255 in #58


Epoch #63: 10001it [02:26, 68.24it/s, env_step=630000, len=500, n/ep=4, n/st=50, pursuer_0/loss=1.855, pursuer_1/loss=1.878, pursuer_2/loss=2.041, pursuer_3/loss=1.991, pursuer_4/loss=1.909, pursuer_5/loss=2.013, pursuer_6/loss=2.239, pursuer_7/loss=2.179, rew=17.97]                             


Epoch #63: test_reward: 1.579312 ± 21.460778, best_reward: 7.865812 ± 20.093255 in #58


Epoch #64: 10001it [02:28, 67.14it/s, env_step=640000, len=500, n/ep=4, n/st=50, pursuer_0/loss=1.973, pursuer_1/loss=1.996, pursuer_2/loss=2.182, pursuer_3/loss=2.008, pursuer_4/loss=2.110, pursuer_5/loss=2.105, pursuer_6/loss=2.089, pursuer_7/loss=2.031, rew=14.23]                             


Epoch #64: test_reward: 2.945625 ± 16.284944, best_reward: 7.865812 ± 20.093255 in #58


Epoch #65: 10001it [02:27, 67.92it/s, env_step=650000, len=500, n/ep=4, n/st=50, pursuer_0/loss=2.066, pursuer_1/loss=1.912, pursuer_2/loss=1.857, pursuer_3/loss=1.825, pursuer_4/loss=2.162, pursuer_5/loss=2.228, pursuer_6/loss=2.041, pursuer_7/loss=2.119, rew=31.60]                             


Steps Policy Saved  320
Epoch #65: test_reward: 3.334000 ± 15.107440, best_reward: 7.865812 ± 20.093255 in #58


Epoch #66: 10001it [02:27, 67.61it/s, env_step=660000, len=500, n/ep=4, n/st=50, pursuer_0/loss=1.961, pursuer_1/loss=2.091, pursuer_2/loss=1.996, pursuer_3/loss=1.974, pursuer_4/loss=2.063, pursuer_5/loss=2.036, pursuer_6/loss=1.905, pursuer_7/loss=2.075, rew=31.24]                             


Epoch #66: test_reward: -1.258063 ± 16.715085, best_reward: 7.865812 ± 20.093255 in #58


Epoch #67: 10001it [02:28, 67.14it/s, env_step=670000, len=500, n/ep=3, n/st=50, pursuer_0/loss=1.992, pursuer_1/loss=2.241, pursuer_2/loss=2.128, pursuer_3/loss=1.960, pursuer_4/loss=2.066, pursuer_5/loss=2.133, pursuer_6/loss=2.021, pursuer_7/loss=2.106, rew=18.44]                             


Epoch #67: test_reward: -1.527938 ± 17.982678, best_reward: 7.865812 ± 20.093255 in #58


Epoch #68: 10001it [02:27, 67.94it/s, env_step=680000, len=500, n/ep=2, n/st=50, pursuer_0/loss=2.208, pursuer_1/loss=2.193, pursuer_2/loss=2.222, pursuer_3/loss=2.290, pursuer_4/loss=2.176, pursuer_5/loss=2.213, pursuer_6/loss=2.092, pursuer_7/loss=2.281, rew=48.67]                             


Epoch #68: test_reward: -1.891000 ± 20.020104, best_reward: 7.865812 ± 20.093255 in #58


Epoch #69: 10001it [02:29, 66.79it/s, env_step=690000, len=500, n/ep=2, n/st=50, pursuer_0/loss=2.058, pursuer_1/loss=2.294, pursuer_2/loss=2.240, pursuer_3/loss=2.281, pursuer_4/loss=2.315, pursuer_5/loss=1.956, pursuer_6/loss=2.130, pursuer_7/loss=2.102, rew=-7.52]                             


Epoch #69: test_reward: -3.824875 ± 20.234613, best_reward: 7.865812 ± 20.093255 in #58


Epoch #70: 10001it [02:27, 67.76it/s, env_step=700000, len=500, n/ep=2, n/st=50, pursuer_0/loss=1.968, pursuer_1/loss=1.904, pursuer_2/loss=1.845, pursuer_3/loss=2.053, pursuer_4/loss=2.167, pursuer_5/loss=2.013, pursuer_6/loss=2.037, pursuer_7/loss=1.927, rew=37.04]                             


Best Saved Rew 393
Epoch #70: test_reward: 10.023250 ± 19.830968, best_reward: 10.023250 ± 19.830968 in #70


Epoch #71: 10001it [02:25, 68.68it/s, env_step=710000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.310, pursuer_1/loss=2.318, pursuer_2/loss=2.291, pursuer_3/loss=2.241, pursuer_4/loss=2.382, pursuer_5/loss=2.080, pursuer_6/loss=2.245, pursuer_7/loss=2.355, rew=1.87]                             


Epoch #71: test_reward: -1.056813 ± 13.756813, best_reward: 10.023250 ± 19.830968 in #70


Epoch #72: 10001it [02:28, 67.53it/s, env_step=720000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.059, pursuer_1/loss=2.167, pursuer_2/loss=2.058, pursuer_3/loss=1.939, pursuer_4/loss=2.110, pursuer_5/loss=2.034, pursuer_6/loss=2.162, pursuer_7/loss=2.385, rew=11.32]                             


Epoch #72: test_reward: -0.555125 ± 15.194031, best_reward: 10.023250 ± 19.830968 in #70


Epoch #73: 10001it [02:25, 68.68it/s, env_step=730000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.236, pursuer_1/loss=2.104, pursuer_2/loss=2.114, pursuer_3/loss=2.261, pursuer_4/loss=1.992, pursuer_5/loss=2.151, pursuer_6/loss=2.131, pursuer_7/loss=2.102, rew=25.99]                             


Epoch #73: test_reward: -4.427875 ± 15.556729, best_reward: 10.023250 ± 19.830968 in #70


Epoch #74: 10001it [02:28, 67.31it/s, env_step=740000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.301, pursuer_1/loss=2.258, pursuer_2/loss=2.377, pursuer_3/loss=2.136, pursuer_4/loss=2.262, pursuer_5/loss=2.190, pursuer_6/loss=2.212, pursuer_7/loss=2.142, rew=-3.59]                             


Best Saved Rew 465
Epoch #74: test_reward: 13.985062 ± 25.060428, best_reward: 13.985062 ± 25.060428 in #74


Epoch #75: 10001it [02:27, 67.69it/s, env_step=750000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.080, pursuer_1/loss=2.348, pursuer_2/loss=2.235, pursuer_3/loss=2.076, pursuer_4/loss=2.165, pursuer_5/loss=2.027, pursuer_6/loss=2.312, pursuer_7/loss=2.166, rew=21.45]                             


Epoch #75: test_reward: 5.688250 ± 16.178163, best_reward: 13.985062 ± 25.060428 in #74


Epoch #76: 10001it [02:27, 67.81it/s, env_step=760000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.247, pursuer_1/loss=2.183, pursuer_2/loss=2.428, pursuer_3/loss=2.167, pursuer_4/loss=2.215, pursuer_5/loss=2.370, pursuer_6/loss=2.275, pursuer_7/loss=2.045, rew=21.26]                             


Epoch #76: test_reward: 8.531687 ± 15.218071, best_reward: 13.985062 ± 25.060428 in #74


Epoch #77: 10001it [02:25, 68.59it/s, env_step=770000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.250, pursuer_1/loss=2.059, pursuer_2/loss=2.104, pursuer_3/loss=2.204, pursuer_4/loss=2.132, pursuer_5/loss=2.228, pursuer_6/loss=2.073, pursuer_7/loss=2.226, rew=34.08]                             


Epoch #77: test_reward: 7.243500 ± 22.510380, best_reward: 13.985062 ± 25.060428 in #74


Epoch #78: 10001it [02:24, 69.45it/s, env_step=780000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.202, pursuer_1/loss=2.274, pursuer_2/loss=2.119, pursuer_3/loss=2.202, pursuer_4/loss=2.256, pursuer_5/loss=2.121, pursuer_6/loss=2.234, pursuer_7/loss=2.218, rew=61.10]                             


Epoch #78: test_reward: -5.348375 ± 11.924846, best_reward: 13.985062 ± 25.060428 in #74


Epoch #79: 10001it [02:24, 68.98it/s, env_step=790000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.246, pursuer_1/loss=2.271, pursuer_2/loss=2.325, pursuer_3/loss=2.192, pursuer_4/loss=2.278, pursuer_5/loss=2.396, pursuer_6/loss=2.072, pursuer_7/loss=2.143, rew=-9.16]                             


Epoch #79: test_reward: 3.605312 ± 19.657935, best_reward: 13.985062 ± 25.060428 in #74


Epoch #80: 10001it [02:28, 67.23it/s, env_step=800000, len=459, n/ep=0, n/st=50, pursuer_0/loss=2.198, pursuer_1/loss=2.190, pursuer_2/loss=2.268, pursuer_3/loss=2.206, pursuer_4/loss=2.154, pursuer_5/loss=2.259, pursuer_6/loss=2.193, pursuer_7/loss=2.232, rew=35.52]                             


Epoch #80: test_reward: 11.739500 ± 21.166475, best_reward: 13.985062 ± 25.060428 in #74


Epoch #81: 10001it [02:27, 67.69it/s, env_step=810000, len=343, n/ep=0, n/st=50, pursuer_0/loss=2.305, pursuer_1/loss=2.327, pursuer_2/loss=2.319, pursuer_3/loss=2.272, pursuer_4/loss=2.381, pursuer_5/loss=2.374, pursuer_6/loss=2.358, pursuer_7/loss=2.299, rew=12.00]                             


Epoch #81: test_reward: 3.315437 ± 25.796991, best_reward: 13.985062 ± 25.060428 in #74


Epoch #82: 10001it [02:24, 69.01it/s, env_step=820000, len=470, n/ep=0, n/st=50, pursuer_0/loss=2.348, pursuer_1/loss=2.310, pursuer_2/loss=2.106, pursuer_3/loss=2.337, pursuer_4/loss=2.131, pursuer_5/loss=2.148, pursuer_6/loss=2.239, pursuer_7/loss=2.102, rew=14.18]                             


Steps Policy Saved  620
Epoch #82: test_reward: 8.025375 ± 19.372222, best_reward: 13.985062 ± 25.060428 in #74


Epoch #83: 10001it [02:27, 67.84it/s, env_step=830000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.296, pursuer_1/loss=2.399, pursuer_2/loss=2.302, pursuer_3/loss=2.351, pursuer_4/loss=2.447, pursuer_5/loss=2.244, pursuer_6/loss=2.319, pursuer_7/loss=2.418, rew=36.91]                             


Epoch #83: test_reward: -1.380625 ± 19.876429, best_reward: 13.985062 ± 25.060428 in #74


Epoch #84: 10001it [02:26, 68.15it/s, env_step=840000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.340, pursuer_1/loss=2.282, pursuer_2/loss=2.421, pursuer_3/loss=2.556, pursuer_4/loss=2.206, pursuer_5/loss=2.347, pursuer_6/loss=2.390, pursuer_7/loss=2.531, rew=12.32]                             


Epoch #84: test_reward: -8.685375 ± 17.509753, best_reward: 13.985062 ± 25.060428 in #74


Epoch #85: 10001it [02:27, 67.82it/s, env_step=850000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.346, pursuer_1/loss=2.399, pursuer_2/loss=2.483, pursuer_3/loss=2.289, pursuer_4/loss=2.224, pursuer_5/loss=2.260, pursuer_6/loss=2.339, pursuer_7/loss=2.330, rew=61.35]                             


Epoch #85: test_reward: 11.630125 ± 23.166131, best_reward: 13.985062 ± 25.060428 in #74


Epoch #86: 10001it [02:26, 68.43it/s, env_step=860000, len=460, n/ep=0, n/st=50, pursuer_0/loss=2.265, pursuer_1/loss=2.392, pursuer_2/loss=2.320, pursuer_3/loss=2.276, pursuer_4/loss=2.201, pursuer_5/loss=2.287, pursuer_6/loss=2.282, pursuer_7/loss=2.238, rew=-28.93]                             


Epoch #86: test_reward: 5.757375 ± 24.240847, best_reward: 13.985062 ± 25.060428 in #74


Epoch #87: 10001it [02:25, 68.60it/s, env_step=870000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.321, pursuer_1/loss=2.144, pursuer_2/loss=2.191, pursuer_3/loss=2.070, pursuer_4/loss=2.004, pursuer_5/loss=2.361, pursuer_6/loss=2.193, pursuer_7/loss=2.087, rew=45.87]                             


Epoch #87: test_reward: 11.318750 ± 19.891951, best_reward: 13.985062 ± 25.060428 in #74


Epoch #88: 10001it [02:27, 67.64it/s, env_step=880000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.494, pursuer_1/loss=2.527, pursuer_2/loss=2.246, pursuer_3/loss=2.475, pursuer_4/loss=2.505, pursuer_5/loss=2.259, pursuer_6/loss=2.467, pursuer_7/loss=2.241, rew=12.44]                             


Steps Policy Saved  740
Epoch #88: test_reward: 4.202000 ± 15.782939, best_reward: 13.985062 ± 25.060428 in #74


Epoch #89: 10001it [02:26, 68.34it/s, env_step=890000, len=400, n/ep=1, n/st=50, pursuer_0/loss=2.316, pursuer_1/loss=2.394, pursuer_2/loss=2.458, pursuer_3/loss=2.446, pursuer_4/loss=2.382, pursuer_5/loss=2.440, pursuer_6/loss=2.350, pursuer_7/loss=2.552, rew=66.09]                             


Epoch #89: test_reward: 9.383125 ± 18.296283, best_reward: 13.985062 ± 25.060428 in #74


Epoch #90: 10001it [02:28, 67.20it/s, env_step=900000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.275, pursuer_1/loss=2.278, pursuer_2/loss=2.399, pursuer_3/loss=2.269, pursuer_4/loss=2.309, pursuer_5/loss=2.289, pursuer_6/loss=2.101, pursuer_7/loss=2.338, rew=41.42]                             


Epoch #90: test_reward: -3.562375 ± 14.403642, best_reward: 13.985062 ± 25.060428 in #74


Epoch #91: 10001it [02:27, 67.64it/s, env_step=910000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.281, pursuer_1/loss=2.230, pursuer_2/loss=2.257, pursuer_3/loss=2.331, pursuer_4/loss=2.519, pursuer_5/loss=2.201, pursuer_6/loss=2.302, pursuer_7/loss=2.338, rew=26.50]                             


Epoch #91: test_reward: 3.680437 ± 22.806144, best_reward: 13.985062 ± 25.060428 in #74


Epoch #92: 10001it [02:25, 68.58it/s, env_step=920000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.303, pursuer_1/loss=2.423, pursuer_2/loss=2.196, pursuer_3/loss=2.030, pursuer_4/loss=2.320, pursuer_5/loss=2.625, pursuer_6/loss=2.080, pursuer_7/loss=2.258, rew=37.20]                             


Epoch #92: test_reward: 5.754000 ± 17.077263, best_reward: 13.985062 ± 25.060428 in #74


Epoch #93: 10001it [02:26, 68.18it/s, env_step=930000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.343, pursuer_1/loss=2.514, pursuer_2/loss=2.400, pursuer_3/loss=2.398, pursuer_4/loss=2.605, pursuer_5/loss=2.487, pursuer_6/loss=2.423, pursuer_7/loss=2.419, rew=31.56]                             


Epoch #93: test_reward: 11.964500 ± 17.974599, best_reward: 13.985062 ± 25.060428 in #74


Epoch #94: 10001it [02:26, 68.45it/s, env_step=940000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.301, pursuer_1/loss=2.147, pursuer_2/loss=2.421, pursuer_3/loss=2.422, pursuer_4/loss=2.360, pursuer_5/loss=2.417, pursuer_6/loss=2.320, pursuer_7/loss=2.342, rew=36.21]                             


Best Saved Rew 867
Epoch #94: test_reward: 14.507250 ± 18.413203, best_reward: 14.507250 ± 18.413203 in #94


Epoch #95: 10001it [02:29, 67.04it/s, env_step=950000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.410, pursuer_1/loss=2.370, pursuer_2/loss=2.275, pursuer_3/loss=2.393, pursuer_4/loss=2.253, pursuer_5/loss=2.497, pursuer_6/loss=2.416, pursuer_7/loss=2.373, rew=1.99]                              


Epoch #95: test_reward: -0.694875 ± 24.170725, best_reward: 14.507250 ± 18.413203 in #94


Epoch #96: 10001it [02:25, 68.64it/s, env_step=960000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.199, pursuer_1/loss=2.319, pursuer_2/loss=2.145, pursuer_3/loss=2.263, pursuer_4/loss=2.306, pursuer_5/loss=2.160, pursuer_6/loss=2.204, pursuer_7/loss=2.327, rew=46.39]                             


Epoch #96: test_reward: 0.775250 ± 21.675034, best_reward: 14.507250 ± 18.413203 in #94


Epoch #97: 10001it [02:27, 68.00it/s, env_step=970000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.499, pursuer_1/loss=2.349, pursuer_2/loss=2.331, pursuer_3/loss=2.315, pursuer_4/loss=2.389, pursuer_5/loss=2.348, pursuer_6/loss=2.364, pursuer_7/loss=2.224, rew=-3.42]                             


Steps Policy Saved  930
Epoch #97: test_reward: 2.908187 ± 19.230060, best_reward: 14.507250 ± 18.413203 in #94


Epoch #98: 10001it [02:27, 67.93it/s, env_step=980000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.560, pursuer_1/loss=2.431, pursuer_2/loss=2.241, pursuer_3/loss=2.318, pursuer_4/loss=2.429, pursuer_5/loss=2.352, pursuer_6/loss=2.321, pursuer_7/loss=2.354, rew=6.17]                             


Epoch #98: test_reward: -0.646813 ± 19.122513, best_reward: 14.507250 ± 18.413203 in #94


Epoch #99: 10001it [02:26, 68.27it/s, env_step=990000, len=399, n/ep=0, n/st=50, pursuer_0/loss=2.199, pursuer_1/loss=2.286, pursuer_2/loss=2.290, pursuer_3/loss=2.182, pursuer_4/loss=2.357, pursuer_5/loss=2.332, pursuer_6/loss=2.076, pursuer_7/loss=2.149, rew=11.04]                             


Epoch #99: test_reward: -16.757438 ± 19.969773, best_reward: 14.507250 ± 18.413203 in #94


Epoch #100: 10001it [02:25, 68.83it/s, env_step=1000000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.532, pursuer_1/loss=2.386, pursuer_2/loss=2.407, pursuer_3/loss=2.324, pursuer_4/loss=2.509, pursuer_5/loss=2.474, pursuer_6/loss=2.370, pursuer_7/loss=2.296, rew=11.86]                             


Epoch #100: test_reward: 7.796062 ± 23.712844, best_reward: 14.507250 ± 18.413203 in #94


Epoch #101: 10001it [02:24, 69.36it/s, env_step=1010000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.420, pursuer_1/loss=2.502, pursuer_2/loss=2.348, pursuer_3/loss=2.390, pursuer_4/loss=2.429, pursuer_5/loss=2.410, pursuer_6/loss=2.257, pursuer_7/loss=2.416, rew=11.59]                             


Epoch #101: test_reward: 2.168875 ± 21.130416, best_reward: 14.507250 ± 18.413203 in #94


Epoch #102: 10001it [02:26, 68.31it/s, env_step=1020000, len=487, n/ep=0, n/st=50, pursuer_0/loss=2.477, pursuer_1/loss=2.531, pursuer_2/loss=2.526, pursuer_3/loss=2.620, pursuer_4/loss=2.377, pursuer_5/loss=2.540, pursuer_6/loss=2.383, pursuer_7/loss=2.488, rew=42.42]                             


Epoch #102: test_reward: -2.568312 ± 16.238017, best_reward: 14.507250 ± 18.413203 in #94


Epoch #103: 10001it [02:26, 68.07it/s, env_step=1030000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.321, pursuer_1/loss=2.494, pursuer_2/loss=2.214, pursuer_3/loss=2.402, pursuer_4/loss=2.373, pursuer_5/loss=2.381, pursuer_6/loss=2.512, pursuer_7/loss=2.617, rew=31.65]                             


Epoch #103: test_reward: 0.026563 ± 11.581209, best_reward: 14.507250 ± 18.413203 in #94


Epoch #104: 10001it [02:27, 67.79it/s, env_step=1040000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.352, pursuer_1/loss=2.322, pursuer_2/loss=2.230, pursuer_3/loss=2.514, pursuer_4/loss=2.364, pursuer_5/loss=2.141, pursuer_6/loss=2.305, pursuer_7/loss=2.397, rew=55.96]                             


Epoch #104: test_reward: -7.757750 ± 19.225668, best_reward: 14.507250 ± 18.413203 in #94


Epoch #105: 10001it [02:28, 67.45it/s, env_step=1050000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.351, pursuer_1/loss=2.380, pursuer_2/loss=2.467, pursuer_3/loss=2.455, pursuer_4/loss=2.336, pursuer_5/loss=2.666, pursuer_6/loss=2.436, pursuer_7/loss=2.411, rew=-7.85]                             


Epoch #105: test_reward: 9.193687 ± 23.726467, best_reward: 14.507250 ± 18.413203 in #94


Epoch #106: 10001it [02:26, 68.19it/s, env_step=1060000, len=492, n/ep=0, n/st=50, pursuer_0/loss=2.114, pursuer_1/loss=2.540, pursuer_2/loss=2.525, pursuer_3/loss=2.283, pursuer_4/loss=2.472, pursuer_5/loss=2.570, pursuer_6/loss=2.392, pursuer_7/loss=2.596, rew=47.10]                             


Epoch #106: test_reward: 3.098687 ± 19.345935, best_reward: 14.507250 ± 18.413203 in #94


Epoch #107: 10001it [02:28, 67.43it/s, env_step=1070000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.440, pursuer_1/loss=2.330, pursuer_2/loss=2.338, pursuer_3/loss=2.381, pursuer_4/loss=2.446, pursuer_5/loss=2.494, pursuer_6/loss=2.322, pursuer_7/loss=2.416, rew=51.97]                             


Epoch #107: test_reward: -8.315063 ± 21.007838, best_reward: 14.507250 ± 18.413203 in #94


Epoch #108: 10001it [02:19, 71.47it/s, env_step=1080000, len=436, n/ep=0, n/st=50, pursuer_0/loss=2.428, pursuer_1/loss=2.598, pursuer_2/loss=2.587, pursuer_3/loss=2.424, pursuer_4/loss=2.588, pursuer_5/loss=2.515, pursuer_6/loss=2.380, pursuer_7/loss=2.668, rew=23.06]                             


Epoch #108: test_reward: 1.218688 ± 16.580490, best_reward: 14.507250 ± 18.413203 in #94


Epoch #109: 10001it [02:06, 78.85it/s, env_step=1090000, len=415, n/ep=0, n/st=50, pursuer_0/loss=2.375, pursuer_1/loss=2.550, pursuer_2/loss=2.300, pursuer_3/loss=2.315, pursuer_4/loss=2.711, pursuer_5/loss=2.384, pursuer_6/loss=2.216, pursuer_7/loss=2.244, rew=39.96]                             


Epoch #109: test_reward: -13.656938 ± 14.304873, best_reward: 14.507250 ± 18.413203 in #94


Epoch #110: 10001it [02:01, 82.47it/s, env_step=1100000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.673, pursuer_1/loss=2.461, pursuer_2/loss=2.334, pursuer_3/loss=2.463, pursuer_4/loss=2.500, pursuer_5/loss=2.528, pursuer_6/loss=2.310, pursuer_7/loss=2.492, rew=17.03]                             


Epoch #110: test_reward: -1.501438 ± 16.879849, best_reward: 14.507250 ± 18.413203 in #94


Epoch #111: 10001it [01:58, 84.12it/s, env_step=1110000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.331, pursuer_1/loss=2.315, pursuer_2/loss=2.241, pursuer_3/loss=2.337, pursuer_4/loss=2.598, pursuer_5/loss=2.464, pursuer_6/loss=2.287, pursuer_7/loss=2.473, rew=22.19]                             


Epoch #111: test_reward: -4.296938 ± 24.702745, best_reward: 14.507250 ± 18.413203 in #94


Epoch #112: 10001it [01:57, 84.96it/s, env_step=1120000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.529, pursuer_1/loss=2.347, pursuer_2/loss=2.545, pursuer_3/loss=2.603, pursuer_4/loss=2.259, pursuer_5/loss=2.559, pursuer_6/loss=2.359, pursuer_7/loss=2.486, rew=35.97]                             


Epoch #112: test_reward: 1.989375 ± 24.527634, best_reward: 14.507250 ± 18.413203 in #94


Epoch #113: 10001it [01:57, 85.31it/s, env_step=1130000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.661, pursuer_1/loss=2.485, pursuer_2/loss=2.623, pursuer_3/loss=2.731, pursuer_4/loss=2.701, pursuer_5/loss=2.620, pursuer_6/loss=2.391, pursuer_7/loss=2.506, rew=32.02]                             


Epoch #113: test_reward: 3.557312 ± 16.939076, best_reward: 14.507250 ± 18.413203 in #94


Epoch #114: 10001it [01:56, 85.58it/s, env_step=1140000, len=240, n/ep=0, n/st=50, pursuer_0/loss=2.533, pursuer_1/loss=2.514, pursuer_2/loss=2.497, pursuer_3/loss=2.520, pursuer_4/loss=2.540, pursuer_5/loss=2.380, pursuer_6/loss=2.492, pursuer_7/loss=2.479, rew=32.53]                             


Epoch #114: test_reward: 9.259125 ± 23.022952, best_reward: 14.507250 ± 18.413203 in #94


Epoch #115: 10001it [01:58, 84.53it/s, env_step=1150000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.485, pursuer_1/loss=2.528, pursuer_2/loss=2.644, pursuer_3/loss=2.536, pursuer_4/loss=2.524, pursuer_5/loss=2.561, pursuer_6/loss=2.396, pursuer_7/loss=2.483, rew=32.53]                             


Steps Policy Saved  1310
Best Saved Rew 1311
Epoch #115: test_reward: 23.594750 ± 23.791390, best_reward: 23.594750 ± 23.791390 in #115


Epoch #116: 10001it [01:58, 84.38it/s, env_step=1160000, len=388, n/ep=0, n/st=50, pursuer_0/loss=2.405, pursuer_1/loss=2.365, pursuer_2/loss=2.543, pursuer_3/loss=2.539, pursuer_4/loss=2.511, pursuer_5/loss=2.614, pursuer_6/loss=2.283, pursuer_7/loss=2.374, rew=13.04]                             


Epoch #116: test_reward: 13.729875 ± 20.242036, best_reward: 23.594750 ± 23.791390 in #115


Epoch #117: 10001it [01:59, 83.40it/s, env_step=1170000, len=442, n/ep=0, n/st=50, pursuer_0/loss=2.375, pursuer_1/loss=2.517, pursuer_2/loss=2.324, pursuer_3/loss=2.388, pursuer_4/loss=2.642, pursuer_5/loss=2.531, pursuer_6/loss=2.346, pursuer_7/loss=2.465, rew=39.57]                             


Epoch #117: test_reward: -12.636063 ± 19.915102, best_reward: 23.594750 ± 23.791390 in #115


Epoch #118: 10001it [02:01, 82.40it/s, env_step=1180000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.585, pursuer_1/loss=2.488, pursuer_2/loss=2.305, pursuer_3/loss=2.560, pursuer_4/loss=2.555, pursuer_5/loss=2.449, pursuer_6/loss=2.563, pursuer_7/loss=2.395, rew=22.42]                             


Epoch #118: test_reward: 12.925250 ± 21.222479, best_reward: 23.594750 ± 23.791390 in #115


Epoch #119: 10001it [02:05, 79.70it/s, env_step=1190000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.292, pursuer_1/loss=2.549, pursuer_2/loss=2.314, pursuer_3/loss=2.362, pursuer_4/loss=2.623, pursuer_5/loss=2.482, pursuer_6/loss=2.470, pursuer_7/loss=2.326, rew=37.25]                             


Epoch #119: test_reward: 8.832500 ± 23.479811, best_reward: 23.594750 ± 23.791390 in #115


Epoch #120: 10001it [02:08, 77.98it/s, env_step=1200000, len=479, n/ep=0, n/st=50, pursuer_0/loss=2.488, pursuer_1/loss=2.481, pursuer_2/loss=2.498, pursuer_3/loss=2.408, pursuer_4/loss=2.344, pursuer_5/loss=2.507, pursuer_6/loss=2.587, pursuer_7/loss=2.656, rew=58.53]                             


Epoch #120: test_reward: -4.831125 ± 30.434152, best_reward: 23.594750 ± 23.791390 in #115


Epoch #121: 10001it [02:10, 76.42it/s, env_step=1210000, len=370, n/ep=0, n/st=50, pursuer_0/loss=2.372, pursuer_1/loss=2.411, pursuer_2/loss=2.505, pursuer_3/loss=2.496, pursuer_4/loss=2.672, pursuer_5/loss=2.559, pursuer_6/loss=2.445, pursuer_7/loss=2.482, rew=94.62]                             


Epoch #121: test_reward: 5.413312 ± 19.825167, best_reward: 23.594750 ± 23.791390 in #115


Epoch #122: 10001it [02:11, 75.79it/s, env_step=1220000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.587, pursuer_1/loss=2.727, pursuer_2/loss=2.533, pursuer_3/loss=2.578, pursuer_4/loss=2.524, pursuer_5/loss=2.673, pursuer_6/loss=2.577, pursuer_7/loss=2.485, rew=12.16]                             


Epoch #122: test_reward: 7.615437 ± 23.301279, best_reward: 23.594750 ± 23.791390 in #115


Epoch #123: 10001it [02:18, 72.21it/s, env_step=1230000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.667, pursuer_1/loss=2.726, pursuer_2/loss=2.788, pursuer_3/loss=2.680, pursuer_4/loss=2.865, pursuer_5/loss=2.729, pursuer_6/loss=2.607, pursuer_7/loss=2.685, rew=11.78]                             


Epoch #123: test_reward: 6.280812 ± 21.015923, best_reward: 23.594750 ± 23.791390 in #115


Epoch #124: 10001it [02:27, 68.00it/s, env_step=1240000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.611, pursuer_1/loss=2.317, pursuer_2/loss=2.547, pursuer_3/loss=2.199, pursuer_4/loss=2.297, pursuer_5/loss=2.527, pursuer_6/loss=2.474, pursuer_7/loss=2.418, rew=61.36]                             


Epoch #124: test_reward: 2.016187 ± 22.716651, best_reward: 23.594750 ± 23.791390 in #115


Epoch #125: 10001it [02:26, 68.12it/s, env_step=1250000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.246, pursuer_1/loss=2.470, pursuer_2/loss=2.232, pursuer_3/loss=2.514, pursuer_4/loss=2.544, pursuer_5/loss=2.420, pursuer_6/loss=2.357, pursuer_7/loss=2.342, rew=41.69]                             


Epoch #125: test_reward: 14.462312 ± 20.630052, best_reward: 23.594750 ± 23.791390 in #115


Epoch #126: 10001it [02:29, 67.08it/s, env_step=1260000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.667, pursuer_1/loss=2.626, pursuer_2/loss=2.537, pursuer_3/loss=2.504, pursuer_4/loss=2.740, pursuer_5/loss=2.582, pursuer_6/loss=2.713, pursuer_7/loss=2.562, rew=31.10]                             


Epoch #126: test_reward: 7.931750 ± 20.406074, best_reward: 23.594750 ± 23.791390 in #115


Epoch #127: 10001it [02:25, 68.85it/s, env_step=1270000, len=382, n/ep=0, n/st=50, pursuer_0/loss=2.363, pursuer_1/loss=2.451, pursuer_2/loss=2.411, pursuer_3/loss=2.279, pursuer_4/loss=2.493, pursuer_5/loss=2.651, pursuer_6/loss=2.374, pursuer_7/loss=2.531, rew=48.47]                             


Epoch #127: test_reward: 15.425125 ± 24.479505, best_reward: 23.594750 ± 23.791390 in #115


Epoch #128: 10001it [02:26, 68.10it/s, env_step=1280000, len=449, n/ep=0, n/st=50, pursuer_0/loss=2.612, pursuer_1/loss=2.388, pursuer_2/loss=2.522, pursuer_3/loss=2.727, pursuer_4/loss=2.549, pursuer_5/loss=2.559, pursuer_6/loss=2.304, pursuer_7/loss=2.451, rew=46.33]                             


Epoch #128: test_reward: -0.908438 ± 15.810563, best_reward: 23.594750 ± 23.791390 in #115


Epoch #129: 10001it [02:26, 68.14it/s, env_step=1290000, len=323, n/ep=0, n/st=50, pursuer_0/loss=2.596, pursuer_1/loss=2.531, pursuer_2/loss=2.735, pursuer_3/loss=2.654, pursuer_4/loss=2.435, pursuer_5/loss=2.746, pursuer_6/loss=2.648, pursuer_7/loss=2.505, rew=74.30]                             


Epoch #129: test_reward: 9.097125 ± 22.683415, best_reward: 23.594750 ± 23.791390 in #115


Epoch #130: 10001it [02:26, 68.18it/s, env_step=1300000, len=377, n/ep=0, n/st=50, pursuer_0/loss=2.572, pursuer_1/loss=2.681, pursuer_2/loss=2.571, pursuer_3/loss=2.782, pursuer_4/loss=2.761, pursuer_5/loss=2.616, pursuer_6/loss=2.472, pursuer_7/loss=2.743, rew=19.20]                             


Epoch #130: test_reward: 6.449312 ± 17.114027, best_reward: 23.594750 ± 23.791390 in #115


Epoch #131: 10001it [02:25, 68.94it/s, env_step=1310000, len=491, n/ep=0, n/st=50, pursuer_0/loss=2.596, pursuer_1/loss=2.535, pursuer_2/loss=2.622, pursuer_3/loss=2.534, pursuer_4/loss=2.502, pursuer_5/loss=2.404, pursuer_6/loss=2.473, pursuer_7/loss=2.532, rew=2.84]                             


Epoch #131: test_reward: 8.436312 ± 21.651960, best_reward: 23.594750 ± 23.791390 in #115


Epoch #132: 10001it [02:17, 72.72it/s, env_step=1320000, len=380, n/ep=0, n/st=50, pursuer_0/loss=2.331, pursuer_1/loss=2.572, pursuer_2/loss=2.286, pursuer_3/loss=2.605, pursuer_4/loss=2.564, pursuer_5/loss=2.413, pursuer_6/loss=2.198, pursuer_7/loss=2.274, rew=44.47]                             


Epoch #132: test_reward: -2.795250 ± 17.763371, best_reward: 23.594750 ± 23.791390 in #115


Epoch #133: 10001it [02:20, 71.15it/s, env_step=1330000, len=442, n/ep=0, n/st=50, pursuer_0/loss=2.629, pursuer_1/loss=2.641, pursuer_2/loss=2.647, pursuer_3/loss=2.697, pursuer_4/loss=2.712, pursuer_5/loss=2.581, pursuer_6/loss=2.480, pursuer_7/loss=2.547, rew=47.76]                             


Epoch #133: test_reward: -1.306813 ± 19.933845, best_reward: 23.594750 ± 23.791390 in #115


Epoch #134: 10001it [02:26, 68.21it/s, env_step=1340000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.551, pursuer_1/loss=2.750, pursuer_2/loss=2.699, pursuer_3/loss=2.573, pursuer_4/loss=2.881, pursuer_5/loss=2.451, pursuer_6/loss=2.568, pursuer_7/loss=2.566, rew=61.31]                             


Steps Policy Saved  1720
Epoch #134: test_reward: -5.151125 ± 24.690233, best_reward: 23.594750 ± 23.791390 in #115


Epoch #135: 10001it [02:26, 68.49it/s, env_step=1350000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.556, pursuer_1/loss=2.266, pursuer_2/loss=2.277, pursuer_3/loss=2.246, pursuer_4/loss=2.412, pursuer_5/loss=2.534, pursuer_6/loss=2.538, pursuer_7/loss=2.685, rew=22.30]                             


Epoch #135: test_reward: -6.060750 ± 23.012740, best_reward: 23.594750 ± 23.791390 in #115


Epoch #136: 10001it [02:16, 73.49it/s, env_step=1360000, len=489, n/ep=0, n/st=50, pursuer_0/loss=2.570, pursuer_1/loss=2.784, pursuer_2/loss=2.772, pursuer_3/loss=2.680, pursuer_4/loss=2.497, pursuer_5/loss=2.560, pursuer_6/loss=2.575, pursuer_7/loss=2.383, rew=-2.28]                             


Epoch #136: test_reward: 9.909687 ± 27.732843, best_reward: 23.594750 ± 23.791390 in #115


Epoch #137: 10001it [02:13, 74.80it/s, env_step=1370000, len=311, n/ep=0, n/st=50, pursuer_0/loss=2.777, pursuer_1/loss=2.517, pursuer_2/loss=2.647, pursuer_3/loss=2.450, pursuer_4/loss=2.503, pursuer_5/loss=2.523, pursuer_6/loss=2.641, pursuer_7/loss=2.485, rew=45.36]                             


Epoch #137: test_reward: -5.062188 ± 18.470377, best_reward: 23.594750 ± 23.791390 in #115


Epoch #138: 10001it [02:25, 68.94it/s, env_step=1380000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.707, pursuer_1/loss=2.727, pursuer_2/loss=2.549, pursuer_3/loss=2.414, pursuer_4/loss=2.567, pursuer_5/loss=2.486, pursuer_6/loss=2.729, pursuer_7/loss=2.517, rew=26.26]                             


Epoch #138: test_reward: -2.509750 ± 19.260688, best_reward: 23.594750 ± 23.791390 in #115


Epoch #139: 10001it [02:24, 69.36it/s, env_step=1390000, len=451, n/ep=0, n/st=50, pursuer_0/loss=2.536, pursuer_1/loss=2.616, pursuer_2/loss=2.667, pursuer_3/loss=2.657, pursuer_4/loss=2.408, pursuer_5/loss=2.434, pursuer_6/loss=2.541, pursuer_7/loss=2.595, rew=26.26]                             


Epoch #139: test_reward: 0.505187 ± 29.078331, best_reward: 23.594750 ± 23.791390 in #115


Epoch #140: 10001it [02:24, 69.04it/s, env_step=1400000, len=444, n/ep=0, n/st=50, pursuer_0/loss=2.647, pursuer_1/loss=2.846, pursuer_2/loss=2.766, pursuer_3/loss=2.922, pursuer_4/loss=2.834, pursuer_5/loss=2.771, pursuer_6/loss=2.604, pursuer_7/loss=2.484, rew=42.48]                             


Epoch #140: test_reward: -1.912438 ± 14.200034, best_reward: 23.594750 ± 23.791390 in #115


Epoch #141: 10001it [02:25, 68.76it/s, env_step=1410000, len=453, n/ep=1, n/st=50, pursuer_0/loss=2.878, pursuer_1/loss=3.042, pursuer_2/loss=2.629, pursuer_3/loss=2.748, pursuer_4/loss=2.516, pursuer_5/loss=2.569, pursuer_6/loss=2.672, pursuer_7/loss=2.677, rew=45.83]                             


Epoch #141: test_reward: 9.459750 ± 21.591854, best_reward: 23.594750 ± 23.791390 in #115


Epoch #142: 10001it [02:25, 68.85it/s, env_step=1420000, len=480, n/ep=0, n/st=50, pursuer_0/loss=2.616, pursuer_1/loss=2.361, pursuer_2/loss=2.670, pursuer_3/loss=2.685, pursuer_4/loss=2.533, pursuer_5/loss=2.716, pursuer_6/loss=2.509, pursuer_7/loss=2.825, rew=64.22]                             


Epoch #142: test_reward: 17.453312 ± 15.560270, best_reward: 23.594750 ± 23.791390 in #115


Epoch #143: 10001it [02:24, 69.05it/s, env_step=1430000, len=402, n/ep=0, n/st=50, pursuer_0/loss=2.523, pursuer_1/loss=2.657, pursuer_2/loss=2.575, pursuer_3/loss=2.559, pursuer_4/loss=2.733, pursuer_5/loss=2.659, pursuer_6/loss=2.603, pursuer_7/loss=2.617, rew=86.29]                             


Epoch #143: test_reward: 10.300875 ± 15.922851, best_reward: 23.594750 ± 23.791390 in #115


Epoch #144: 10001it [02:25, 68.75it/s, env_step=1440000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.626, pursuer_1/loss=2.548, pursuer_2/loss=2.595, pursuer_3/loss=2.701, pursuer_4/loss=2.804, pursuer_5/loss=2.714, pursuer_6/loss=2.647, pursuer_7/loss=2.668, rew=37.15]                             


Steps Policy Saved  1940
Epoch #144: test_reward: 6.818750 ± 18.563301, best_reward: 23.594750 ± 23.791390 in #115


Epoch #145: 10001it [02:27, 67.66it/s, env_step=1450000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.883, pursuer_1/loss=2.562, pursuer_2/loss=2.538, pursuer_3/loss=2.638, pursuer_4/loss=2.415, pursuer_5/loss=2.620, pursuer_6/loss=2.483, pursuer_7/loss=2.611, rew=76.83]                             


Epoch #145: test_reward: -7.833188 ± 17.021703, best_reward: 23.594750 ± 23.791390 in #115


Epoch #146: 10001it [02:26, 68.27it/s, env_step=1460000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.564, pursuer_1/loss=2.777, pursuer_2/loss=2.707, pursuer_3/loss=2.779, pursuer_4/loss=2.839, pursuer_5/loss=2.753, pursuer_6/loss=2.524, pursuer_7/loss=2.809, rew=11.62]                             


Epoch #146: test_reward: 2.239812 ± 21.377454, best_reward: 23.594750 ± 23.791390 in #115


Epoch #147: 10001it [02:19, 71.90it/s, env_step=1470000, len=430, n/ep=0, n/st=50, pursuer_0/loss=2.861, pursuer_1/loss=2.609, pursuer_2/loss=2.640, pursuer_3/loss=2.757, pursuer_4/loss=2.762, pursuer_5/loss=2.675, pursuer_6/loss=2.576, pursuer_7/loss=2.855, rew=48.06]                             


Epoch #147: test_reward: 10.722125 ± 23.382204, best_reward: 23.594750 ± 23.791390 in #115


Epoch #148: 10001it [02:13, 75.13it/s, env_step=1480000, len=355, n/ep=0, n/st=50, pursuer_0/loss=2.834, pursuer_1/loss=2.928, pursuer_2/loss=2.814, pursuer_3/loss=2.874, pursuer_4/loss=2.665, pursuer_5/loss=2.809, pursuer_6/loss=2.699, pursuer_7/loss=2.795, rew=70.52]                             


Epoch #148: test_reward: 11.288000 ± 20.926388, best_reward: 23.594750 ± 23.791390 in #115


Epoch #149: 10001it [02:22, 70.09it/s, env_step=1490000, len=388, n/ep=1, n/st=50, pursuer_0/loss=2.878, pursuer_1/loss=2.826, pursuer_2/loss=2.685, pursuer_3/loss=3.174, pursuer_4/loss=2.895, pursuer_5/loss=3.037, pursuer_6/loss=2.737, pursuer_7/loss=3.169, rew=72.69]                             


Epoch #149: test_reward: 9.614625 ± 19.393044, best_reward: 23.594750 ± 23.791390 in #115


Epoch #150: 10001it [02:26, 68.27it/s, env_step=1500000, len=402, n/ep=0, n/st=50, pursuer_0/loss=2.979, pursuer_1/loss=2.919, pursuer_2/loss=2.785, pursuer_3/loss=2.912, pursuer_4/loss=2.758, pursuer_5/loss=2.852, pursuer_6/loss=2.903, pursuer_7/loss=2.821, rew=41.28]                             


Epoch #150: test_reward: 14.170625 ± 17.713049, best_reward: 23.594750 ± 23.791390 in #115


Epoch #151: 10001it [02:17, 72.83it/s, env_step=1510000, len=387, n/ep=0, n/st=50, pursuer_0/loss=2.644, pursuer_1/loss=2.687, pursuer_2/loss=2.808, pursuer_3/loss=2.830, pursuer_4/loss=2.816, pursuer_5/loss=2.799, pursuer_6/loss=2.621, pursuer_7/loss=2.825, rew=72.12]                             


Epoch #151: test_reward: 5.467562 ± 19.888018, best_reward: 23.594750 ± 23.791390 in #115


Epoch #152: 10001it [02:16, 73.18it/s, env_step=1520000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.906, pursuer_1/loss=2.899, pursuer_2/loss=2.975, pursuer_3/loss=2.555, pursuer_4/loss=2.651, pursuer_5/loss=2.938, pursuer_6/loss=2.653, pursuer_7/loss=2.735, rew=51.74]                             


Epoch #152: test_reward: 23.254437 ± 27.175431, best_reward: 23.594750 ± 23.791390 in #115


Epoch #153: 10001it [02:18, 72.47it/s, env_step=1530000, len=500, n/ep=1, n/st=50, pursuer_0/loss=2.917, pursuer_1/loss=2.716, pursuer_2/loss=2.771, pursuer_3/loss=2.978, pursuer_4/loss=2.868, pursuer_5/loss=2.906, pursuer_6/loss=2.581, pursuer_7/loss=2.954, rew=56.74]                             


Epoch #153: test_reward: 1.048750 ± 22.912420, best_reward: 23.594750 ± 23.791390 in #115


Epoch #154: 10001it [02:17, 72.50it/s, env_step=1540000, len=461, n/ep=0, n/st=50, pursuer_0/loss=2.910, pursuer_1/loss=2.842, pursuer_2/loss=2.632, pursuer_3/loss=2.769, pursuer_4/loss=2.675, pursuer_5/loss=2.967, pursuer_6/loss=2.723, pursuer_7/loss=2.765, rew=50.21]                             


Epoch #154: test_reward: -7.872438 ± 25.997432, best_reward: 23.594750 ± 23.791390 in #115


Epoch #155: 10001it [02:24, 69.35it/s, env_step=1550000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.963, pursuer_1/loss=2.941, pursuer_2/loss=2.601, pursuer_3/loss=2.711, pursuer_4/loss=2.518, pursuer_5/loss=2.952, pursuer_6/loss=2.694, pursuer_7/loss=2.754, rew=66.62]                             


Epoch #155: test_reward: 10.488500 ± 22.779093, best_reward: 23.594750 ± 23.791390 in #115


Epoch #156: 10001it [02:09, 77.28it/s, env_step=1560000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.762, pursuer_1/loss=2.807, pursuer_2/loss=2.935, pursuer_3/loss=2.819, pursuer_4/loss=3.072, pursuer_5/loss=3.134, pursuer_6/loss=2.791, pursuer_7/loss=2.753, rew=11.16]                             


Epoch #156: test_reward: 13.645312 ± 22.848978, best_reward: 23.594750 ± 23.791390 in #115


Epoch #157: 10001it [02:17, 72.56it/s, env_step=1570000, len=380, n/ep=0, n/st=50, pursuer_0/loss=2.864, pursuer_1/loss=2.919, pursuer_2/loss=2.875, pursuer_3/loss=2.972, pursuer_4/loss=2.880, pursuer_5/loss=2.758, pursuer_6/loss=2.799, pursuer_7/loss=2.931, rew=48.88]                             


Epoch #157: test_reward: 20.516000 ± 21.692316, best_reward: 23.594750 ± 23.791390 in #115


Epoch #158: 10001it [02:22, 70.26it/s, env_step=1580000, len=407, n/ep=1, n/st=50, pursuer_0/loss=2.968, pursuer_1/loss=3.112, pursuer_2/loss=2.831, pursuer_3/loss=2.820, pursuer_4/loss=2.835, pursuer_5/loss=2.849, pursuer_6/loss=2.848, pursuer_7/loss=3.414, rew=26.52]                             


Epoch #158: test_reward: 14.458937 ± 27.947749, best_reward: 23.594750 ± 23.791390 in #115


Epoch #159: 10001it [02:15, 73.61it/s, env_step=1590000, len=334, n/ep=0, n/st=50, pursuer_0/loss=2.841, pursuer_1/loss=2.789, pursuer_2/loss=3.001, pursuer_3/loss=2.799, pursuer_4/loss=2.963, pursuer_5/loss=2.797, pursuer_6/loss=2.911, pursuer_7/loss=3.086, rew=97.59]                             


Steps Policy Saved  2290
Epoch #159: test_reward: 15.767250 ± 23.569582, best_reward: 23.594750 ± 23.791390 in #115


Epoch #160: 10001it [02:10, 76.40it/s, env_step=1600000, len=500, n/ep=1, n/st=50, pursuer_0/loss=3.104, pursuer_1/loss=2.909, pursuer_2/loss=2.899, pursuer_3/loss=2.910, pursuer_4/loss=2.845, pursuer_5/loss=3.056, pursuer_6/loss=2.837, pursuer_7/loss=2.774, rew=56.43]                             


Epoch #160: test_reward: 6.711062 ± 21.012557, best_reward: 23.594750 ± 23.791390 in #115


Epoch #161: 10001it [02:12, 75.73it/s, env_step=1610000, len=475, n/ep=0, n/st=50, pursuer_0/loss=2.986, pursuer_1/loss=2.698, pursuer_2/loss=2.714, pursuer_3/loss=2.928, pursuer_4/loss=2.927, pursuer_5/loss=2.828, pursuer_6/loss=2.902, pursuer_7/loss=2.841, rew=28.69]                             


Epoch #161: test_reward: 21.881625 ± 16.834572, best_reward: 23.594750 ± 23.791390 in #115


Epoch #162: 10001it [02:15, 73.60it/s, env_step=1620000, len=474, n/ep=0, n/st=50, pursuer_0/loss=2.989, pursuer_1/loss=2.792, pursuer_2/loss=2.872, pursuer_3/loss=3.063, pursuer_4/loss=3.106, pursuer_5/loss=2.627, pursuer_6/loss=2.639, pursuer_7/loss=2.773, rew=84.30]                             


Epoch #162: test_reward: 8.932125 ± 17.693865, best_reward: 23.594750 ± 23.791390 in #115


Epoch #163: 10001it [02:18, 72.03it/s, env_step=1630000, len=500, n/ep=0, n/st=50, pursuer_0/loss=2.787, pursuer_1/loss=2.788, pursuer_2/loss=2.920, pursuer_3/loss=2.830, pursuer_4/loss=2.639, pursuer_5/loss=2.906, pursuer_6/loss=2.902, pursuer_7/loss=3.172, rew=31.69]                             


Epoch #163: test_reward: 12.093500 ± 22.481649, best_reward: 23.594750 ± 23.791390 in #115


Epoch #164: 10001it [02:18, 72.18it/s, env_step=1640000, len=500, n/ep=0, n/st=50, pursuer_0/loss=3.104, pursuer_1/loss=3.087, pursuer_2/loss=3.101, pursuer_3/loss=2.907, pursuer_4/loss=3.110, pursuer_5/loss=2.874, pursuer_6/loss=2.914, pursuer_7/loss=3.000, rew=41.15]                             


Epoch #164: test_reward: 11.971000 ± 17.440813, best_reward: 23.594750 ± 23.791390 in #115


Epoch #165: 10001it [02:26, 68.20it/s, env_step=1650000, len=319, n/ep=0, n/st=50, pursuer_0/loss=2.982, pursuer_1/loss=3.070, pursuer_2/loss=2.771, pursuer_3/loss=3.067, pursuer_4/loss=3.151, pursuer_5/loss=2.948, pursuer_6/loss=3.045, pursuer_7/loss=2.848, rew=69.33]                             


Epoch #165: test_reward: 7.792375 ± 23.262758, best_reward: 23.594750 ± 23.791390 in #115


Epoch #166: 10001it [02:24, 69.26it/s, env_step=1660000, len=370, n/ep=0, n/st=50, pursuer_0/loss=2.899, pursuer_1/loss=3.129, pursuer_2/loss=2.897, pursuer_3/loss=2.932, pursuer_4/loss=2.880, pursuer_5/loss=3.059, pursuer_6/loss=2.596, pursuer_7/loss=3.060, rew=79.13]                             


Steps Policy Saved  2450
Epoch #166: test_reward: 13.295875 ± 15.967777, best_reward: 23.594750 ± 23.791390 in #115


Epoch #167: 10001it [02:28, 67.13it/s, env_step=1670000, len=363, n/ep=0, n/st=50, pursuer_0/loss=2.991, pursuer_1/loss=2.986, pursuer_2/loss=2.871, pursuer_3/loss=3.023, pursuer_4/loss=3.197, pursuer_5/loss=3.215, pursuer_6/loss=3.010, pursuer_7/loss=3.038, rew=69.98]                             


Epoch #167: test_reward: 18.785875 ± 18.371017, best_reward: 23.594750 ± 23.791390 in #115


Epoch #168: 10001it [02:28, 67.35it/s, env_step=1680000, len=500, n/ep=0, n/st=50, pursuer_0/loss=3.006, pursuer_1/loss=3.133, pursuer_2/loss=3.104, pursuer_3/loss=3.070, pursuer_4/loss=3.170, pursuer_5/loss=2.974, pursuer_6/loss=3.121, pursuer_7/loss=3.060, rew=32.60]                             


Epoch #168: test_reward: 0.991937 ± 20.300071, best_reward: 23.594750 ± 23.791390 in #115


Epoch #169: 10001it [02:27, 67.88it/s, env_step=1690000, len=414, n/ep=0, n/st=50, pursuer_0/loss=3.199, pursuer_1/loss=3.117, pursuer_2/loss=2.879, pursuer_3/loss=3.053, pursuer_4/loss=3.102, pursuer_5/loss=3.088, pursuer_6/loss=2.975, pursuer_7/loss=3.354, rew=50.45]                             


Best Saved Rew 2524
Epoch #169: test_reward: 25.885625 ± 27.005294, best_reward: 25.885625 ± 27.005294 in #169


Epoch #170: 10001it [02:27, 67.87it/s, env_step=1700000, len=500, n/ep=0, n/st=50, pursuer_0/loss=3.118, pursuer_1/loss=2.942, pursuer_2/loss=3.194, pursuer_3/loss=2.718, pursuer_4/loss=3.002, pursuer_5/loss=2.941, pursuer_6/loss=3.132, pursuer_7/loss=3.040, rew=37.15]                             


Epoch #170: test_reward: 1.401687 ± 22.383154, best_reward: 25.885625 ± 27.005294 in #169


Epoch #171: 10001it [02:27, 67.77it/s, env_step=1710000, len=346, n/ep=0, n/st=50, pursuer_0/loss=3.216, pursuer_1/loss=2.849, pursuer_2/loss=2.905, pursuer_3/loss=3.006, pursuer_4/loss=2.981, pursuer_5/loss=3.191, pursuer_6/loss=3.067, pursuer_7/loss=2.889, rew=77.11]                             


Steps Policy Saved  2570
Epoch #171: test_reward: 2.961250 ± 20.580919, best_reward: 25.885625 ± 27.005294 in #169


Epoch #172: 10001it [02:25, 68.82it/s, env_step=1720000, len=485, n/ep=0, n/st=50, pursuer_0/loss=3.141, pursuer_1/loss=3.037, pursuer_2/loss=2.831, pursuer_3/loss=3.442, pursuer_4/loss=2.981, pursuer_5/loss=3.270, pursuer_6/loss=3.040, pursuer_7/loss=2.900, rew=3.09]                              


Epoch #172: test_reward: 12.814750 ± 18.298531, best_reward: 25.885625 ± 27.005294 in #169


Epoch #173: 10001it [02:27, 67.89it/s, env_step=1730000, len=412, n/ep=0, n/st=50, pursuer_0/loss=3.046, pursuer_1/loss=2.895, pursuer_2/loss=3.148, pursuer_3/loss=3.320, pursuer_4/loss=3.204, pursuer_5/loss=3.213, pursuer_6/loss=2.896, pursuer_7/loss=3.311, rew=55.81]                             


Epoch #173: test_reward: 16.483062 ± 24.201524, best_reward: 25.885625 ± 27.005294 in #169


Epoch #174: 10001it [02:28, 67.54it/s, env_step=1740000, len=350, n/ep=0, n/st=50, pursuer_0/loss=3.197, pursuer_1/loss=3.069, pursuer_2/loss=3.122, pursuer_3/loss=3.129, pursuer_4/loss=2.820, pursuer_5/loss=3.172, pursuer_6/loss=2.958, pursuer_7/loss=3.134, rew=36.53]                             


Epoch #174: test_reward: -6.360125 ± 18.334506, best_reward: 25.885625 ± 27.005294 in #169


Epoch #175: 10001it [02:24, 69.06it/s, env_step=1750000, len=401, n/ep=0, n/st=50, pursuer_0/loss=3.135, pursuer_1/loss=2.825, pursuer_2/loss=3.267, pursuer_3/loss=3.184, pursuer_4/loss=3.110, pursuer_5/loss=3.618, pursuer_6/loss=3.295, pursuer_7/loss=3.168, rew=76.37]                             


Epoch #175: test_reward: 7.847187 ± 20.356036, best_reward: 25.885625 ± 27.005294 in #169


Epoch #176: 10001it [02:27, 67.66it/s, env_step=1760000, len=333, n/ep=0, n/st=50, pursuer_0/loss=3.449, pursuer_1/loss=3.170, pursuer_2/loss=3.178, pursuer_3/loss=3.308, pursuer_4/loss=3.144, pursuer_5/loss=3.256, pursuer_6/loss=3.119, pursuer_7/loss=3.084, rew=37.79]                             


Epoch #176: test_reward: -0.280625 ± 16.469588, best_reward: 25.885625 ± 27.005294 in #169


Epoch #177: 10001it [02:11, 75.89it/s, env_step=1770000, len=404, n/ep=0, n/st=50, pursuer_0/loss=3.252, pursuer_1/loss=3.075, pursuer_2/loss=3.110, pursuer_3/loss=3.303, pursuer_4/loss=3.195, pursuer_5/loss=3.219, pursuer_6/loss=3.180, pursuer_7/loss=3.008, rew=91.01]                             


Epoch #177: test_reward: 9.963562 ± 23.512793, best_reward: 25.885625 ± 27.005294 in #169


Epoch #178: 10001it [02:13, 74.76it/s, env_step=1780000, len=481, n/ep=0, n/st=50, pursuer_0/loss=2.969, pursuer_1/loss=3.080, pursuer_2/loss=2.910, pursuer_3/loss=2.969, pursuer_4/loss=2.877, pursuer_5/loss=3.299, pursuer_6/loss=3.071, pursuer_7/loss=2.725, rew=-6.41]                             


Epoch #178: test_reward: -3.171500 ± 17.411879, best_reward: 25.885625 ± 27.005294 in #169


Epoch #179: 10001it [02:16, 73.46it/s, env_step=1790000, len=420, n/ep=0, n/st=50, pursuer_0/loss=3.020, pursuer_1/loss=2.936, pursuer_2/loss=3.154, pursuer_3/loss=3.122, pursuer_4/loss=3.050, pursuer_5/loss=3.058, pursuer_6/loss=3.034, pursuer_7/loss=3.110, rew=94.15]                             


ValueError: operands could not be broadcast together with shapes (10,) (9,) (10,) 