In [1]:
import os
import datetime
from typing import Optional, Tuple
import json


os.environ["WANDB_NOTEBOOK_NAME"] = "Tianshow_Centralized_Training"

import numpy as np
import torch
from tianshou.data import Collector, VectorReplayBuffer, PrioritizedVectorReplayBuffer
from tianshou.env import DummyVectorEnv
from tianshou.env.pettingzoo_env import PettingZooEnv
from tianshou.policy import BasePolicy, DQNPolicy, MultiAgentPolicyManager, RandomPolicy, RainbowPolicy
from tianshou.trainer import OffpolicyTrainer
from torch.utils.tensorboard import SummaryWriter

from pettingzoo.sisl import pursuit_v4

from TaskAllocation.RL_Policies.MultiHead_SISL import MultiHead_SISL
from TaskAllocation.RL_Policies.DNN_SISL import DNN_SISL
from TaskAllocation.RL_Policies.CNN_SISL import CNN_SISL
from TaskAllocation.RL_Policies.CNN_ATT_SISL import CNN_ATT_SISL
from TaskAllocation.RL_Policies.SISL_Task_MultiHead import SISL_Task_MultiHead


from Mods.MemoryBuffer import StateMemoryVectorReplayBuffer
from Mods.MemoryBuffer import MemoryOffpolicyTrainer
import Mods.MemPursuitEnv as MemPursuitEnv
from Mods.OffPolicyTrainerMod import OffPolicyTrainerMod

import Mods.TaskPursuitEnv as TaskPursuitEnv

from TaskAllocation.RL_Policies.Custom_Classes import CustomNet
from TaskAllocation.RL_Policies.Custom_Classes import CustomCollector
from TaskAllocation.RL_Policies.Custom_Classes import CustomParallelToAECWrapper

# Add specific modification to tianshou
import wandb
from tianshou.utils import WandbLogger
from tianshou.utils.logger.base import LOG_DATA_TYPE

def new_write(self, step_type: str, step: int, data: LOG_DATA_TYPE) -> None:
    data[step_type] = step
    wandb.log(data)
    
WandbLogger.write = new_write 

#from tianshou_DQN import train
model  =  "CNN_SISL"#"SISL_Task_MultiHead" #"CNN_ATT_SISL" #"MultiHead_SISL" 
test_num  =  "_Desk_CNN_FV5"
policyModel  =  "DQN"

train_env_num = 10
test_env_num = 30

name = model + test_num

# log
now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
log_name = name + str(now)
log_path = os.path.join('./', "Logs", "dqn_sisl", log_name)

#policy
load_policy_name = f'policy_SISL_Task_MultiHead_Desk_NewExpCor231219-173711_44.pth'
save_policy_name = f'policy_{log_name}'
policy_path = "dqn_SISL"

Policy_Config = {
    "same_policy" : True,
    "load_model" : False,
    "freeze_CNN" : False     
                }

SISL_Config = {
    "max_cycles": 500,         # default: 500
    "x_size": 16,              # default: 16
    "y_size": 16,              # default: 16
    "shared_reward": False,    # default: True
    "n_evaders": 30,           # default: 30
    "n_pursuers": 8,           # default: 10
    "obs_range": 9,            # default: 7
    "n_catch": 2,              # default: 2
    "freeze_evaders": False,   # default: False
    "tag_reward": 0.01,        # default: 0.01
    "catch_reward": 5.0,       # default: 5.0
    "urgency_reward": -0.1,    # default: -0.1
    "surround": True,          # default: True
    "constraint_window": 1.0,  # default: 1.0
    ###---- Additional Config ----###
    # "att_memory" : False,
    # "max_tasks" : 10  
}

max_cycles = SISL_Config["max_cycles"]
n_agents = SISL_Config["n_pursuers"]

dqn_params = {"discount_factor": 0.98, 
              "estimation_step": 20, 
              "target_update_freq": 1000,#max_cycles * n_agents,
              "optminizer": "Adam",
              "lr": 0.00016 }

trainer_params = {"max_epoch": 500,
                  "step_per_epoch": 20000,#5 * (150 * n_agents),
                  "step_per_collect": 400,# * (10 * n_agents),
                  "episode_per_test": 30,
                  "batch_size" : 32 * n_agents,
                  "update_per_step": 1 / 50, #Only run after close a Collect (run many times as necessary to meet the value)
                  "tn_eps_max": 0.10,
                  "ts_eps_max": 0.01,
                  "warmup_size" : 1
                  }


runConfig = dqn_params
runConfig.update(Policy_Config)
runConfig.update(trainer_params) 
runConfig.update(SISL_Config)

model_load_path = os.path.join(policy_path, load_policy_name)  
model_save_path = os.path.join(policy_path, save_policy_name)        
os.makedirs(os.path.join(policy_path), exist_ok=True)  
os.makedirs(os.path.join(log_path), exist_ok=True)

def _get_agents(
    agent_learn: Optional[BasePolicy] = None,
    agent_opponent: Optional[BasePolicy] = None,
    optim: Optional[torch.optim.Optimizer] = None,
    policy_load_path = None,
) -> Tuple[BasePolicy, torch.optim.Optimizer, list]:
    
    env = _get_env()       
    agent_observation_space = env.observation_space
   
    action_shape = env.action_space
    
    device="cuda" if torch.cuda.is_available() else "cpu"  

    agents = []        
    
    if Policy_Config["same_policy"]:
        policies_number = 1
    else:
        policies_number = 4#len(env.agents)

    for _ in range(policies_number):      
        
        if model == "MultiHead_SISL":
            net = MultiHead_SISL(
                obs_shape=agent_observation_space,                
                num_tasks=5,
                hidden_sizes = 32,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)
        
        if model == "SISL_Task_MultiHead":
            net = SISL_Task_MultiHead(                
                num_tasks=20,
                num_features_per_task = 9,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "DNN_SISL":
            net = DNN_SISL(
                obs_shape=agent_observation_space,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "CNN_SISL":
            net = CNN_SISL(
                obs_shape=agent_observation_space.shape,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)            
        
        if model == "CNN_ATT_SISL":
            net = CNN_ATT_SISL(
                obs_shape=agent_observation_space.shape,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)           
        
        
    
        if Policy_Config["freeze_CNN"]:                
                net.freeze_conv_layers()  # Freeze the convolutional layers

                optim = torch.optim.Adam(
                    list(net.policy_fn.parameters()) + list(net.value_fn.parameters()), 
                    lr=dqn_params["lr"]
                )
        else:
            optim = torch.optim.Adam(net.parameters(), lr=dqn_params["lr"], weight_decay=0.0, amsgrad= True )                



        if policyModel == "DQN":
            agent_learn = DQNPolicy(
                model=net,
                optim=optim,
                action_space = action_shape,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
                reward_normalization = False,
                clip_loss_grad = False 
            ) 
         
 
        if Policy_Config["load_model"] is True:
            # Load the saved checkpoint             
            agent_learn.load_state_dict(torch.load(model_load_path))
            print(f'Loaded-> {model_load_path}')
                   
        #print(env.agents)
        #agents = [agent_learn for _ in range(len(env.agents))]
        
        agents.append(agent_learn)

    if Policy_Config["same_policy"]:
        agents = [agents[0] for _ in range(len(env.agents))]
    else:
        for _ in range(len(env.agents) - policies_number):
            agents.append(agents[0])

    policy = MultiAgentPolicyManager(policies = agents, env=env)  
        
    return policy, optim, env.agents

def _get_env(test=False):
    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    # env = pursuit_v4.env()
    if not test:
        #env = TaskPursuitEnv.env(
        env =  pursuit_v4.env(
                max_cycles=SISL_Config["max_cycles"],
                x_size=SISL_Config["x_size"],
                y_size=SISL_Config["y_size"],
                shared_reward=SISL_Config["shared_reward"],
                n_evaders=SISL_Config["n_evaders"],
                n_pursuers=SISL_Config["n_pursuers"],
                obs_range=SISL_Config["obs_range"],
                n_catch=SISL_Config["n_catch"],
                freeze_evaders=SISL_Config["freeze_evaders"],
                tag_reward=SISL_Config["tag_reward"],
                catch_reward=SISL_Config["catch_reward"],
                urgency_reward=SISL_Config["urgency_reward"],
                surround=SISL_Config["surround"],
                constraint_window=SISL_Config["constraint_window"],
                # att_memory = SISL_Config["att_memory"],
                #render_mode= "human"#True
                render_mode= None#"human"#True
            )
    else:
        #env = TaskPursuitEnv.env(
            env =  pursuit_v4.env(
                max_cycles=SISL_Config["max_cycles"],
                x_size=SISL_Config["x_size"],
                y_size=SISL_Config["y_size"],
                shared_reward=SISL_Config["shared_reward"],
                n_evaders=SISL_Config["n_evaders"],
                n_pursuers=SISL_Config["n_pursuers"],
                obs_range=SISL_Config["obs_range"],
                n_catch=SISL_Config["n_catch"],
                freeze_evaders=SISL_Config["freeze_evaders"],
                tag_reward=SISL_Config["tag_reward"],
                catch_reward=SISL_Config["catch_reward"],
                urgency_reward=SISL_Config["urgency_reward"],
                surround=SISL_Config["surround"],
                constraint_window=SISL_Config["constraint_window"],
                # att_memory = SISL_Config["att_memory"],
                #render_mode= "human"#True
                render_mode= None#"human"#True
            )

    
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    
    return PettingZooEnv(env)   

# print(json.dumps(runConfig, indent=4))


In [2]:
if __name__ == "__main__":
                        
    torch.set_grad_enabled(True) 
   
    # ======== Step 1: Environment setup =========
    train_envs = DummyVectorEnv([_get_env for _ in range(train_env_num)])
    test_envs = DummyVectorEnv([_get_env for _ in range(test_env_num)]) 

    # seed
    seed = 100
    np.random.seed(seed)
    
    torch.manual_seed(seed)

    train_envs.seed(seed)
    test_envs.seed(seed)

    # ======== Step 2: Agent setup =========
    policy, optim, agents = _get_agents()    

    # ======== Step 3: Collector setup =========
    train_collector = Collector(
        policy,
        train_envs,
        # VectorReplayBuffer(300_000, len(train_envs)),
        PrioritizedVectorReplayBuffer( 300_000, len(train_envs), alpha=0.6, beta=0.4) , 
        #ListReplayBuffer(100000)       
        # buffer = StateMemoryVectorReplayBuffer(
        #         300_000,
        #         len(train_envs),  # Assuming train_envs is your vectorized environment
        #         memory_size=10,                
        #     ),
        exploration_noise=True             
    )
    test_collector = Collector(policy, test_envs, exploration_noise=True)
     
    print("Buffer Warming Up ")    
    # for i in range(trainer_params["warmup_size"]):#int(trainer_params['batch_size'] / (300 * 10 ) )):
        
    #     train_collector.collect(n_episode=train_env_num)#,random=True) #trainer_params['batch_size'] * train_env_num))
    #     #train_collector.collect(n_step=300 * 10)
    #     print(".", end="") 
    
    len_buffer = len(train_collector.buffer) / (SISL_Config["max_cycles"] * SISL_Config["n_pursuers"])
    print("\nBuffer Lenght: ", len_buffer ) 
    
    info = { "Buffer"  : "PriorizedReplayBuffer", " Warmup_ep" : len_buffer}
    # ======== tensorboard logging setup =========                       
    logger = WandbLogger(
        train_interval = runConfig["max_cycles"] * runConfig["n_pursuers"] ,
        test_interval = 1,#runConfig["max_cycles"] * runConfig["n_pursuers"],
        update_interval = runConfig["max_cycles"],
        save_interval = 1,
        write_flush = True,
        project = "SISL_Eval01",
        name = log_name,
        entity = None,
        run_id = log_name,
        config = runConfig,
        monitor_gym = True )
    
    writer = SummaryWriter(log_path)    
    writer.add_text("args", str(runConfig))    
    logger.load(writer)

    
    global_step_holder = [0] 
    
    
    # ======== Step 4: Callback functions setup =========
    def save_best_fn(policy):                
        
        if Policy_Config["same_policy"]:
            torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_BestRew.pth")
            print("Best Saved Rew" , str(global_step_holder[0]))
        
        else:
            for n,agent in enumerate(agents):
                torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + ".pth")
            
            print("Bests Saved Rew" , str(global_step_holder[0]))
        
    def save_test_best_fn(policy):                
        
        if Policy_Config["same_policy"]:
            torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_BestLen.pth")
            print("Best Saved Length" , str(global_step_holder[0]))
        
        else:
            for n,agent in enumerate(agents):
                torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + ".pth")
            
            print("Best Saved Length" , str(global_step_holder[0]))
        

    def stop_fn(mean_rewards):
        return mean_rewards >= 99999939.0

    def train_fn(epoch, env_step):
        epsilon = trainer_params['tn_eps_max'] - (trainer_params['tn_eps_max'] - trainer_params['tn_eps_max']/100)*(epoch/trainer_params['max_epoch'])          
        if Policy_Config["same_policy"]:
            policy.policies[agents[0]].set_eps(epsilon)
        else:
            for agent in agents:
                policy.policies[agent].set_eps(epsilon)
                
        
        # if env_step % 500 == 0:
            # logger.write("train/env_step", env_step, {"train/eps": eps})


    def test_fn(epoch, env_step):
               
        epsilon = trainer_params['ts_eps_max']#0.01#max(0.001, 0.1 - epoch * 0.001)
        if Policy_Config["same_policy"]:
            policy.policies[agents[0]].set_eps(epsilon)
        else:            
            for agent in agents:                             
                 policy.policies[agent].set_eps(epsilon)
                
        
        if global_step_holder[0] % 10 == 0:
            
            if Policy_Config["same_policy"]:
                torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_Step.pth")
                print("Steps Policy Saved " , str(global_step_holder[0]))
            
            else:
                for n,agent in enumerate(agents):
                    torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + "Step" + str(global_step_holder[0]) + ".pth")
                
                print("Steps Policy Saved " , str(global_step_holder[0]))

        
    def reward_metric(rews):       
                
        global_step_holder[0] +=1 
        return np.sum(rews, axis = 1)


    # # ======== Step 5: Run the trainer =========
    offPolicyTrainer = OffpolicyTrainer(
        policy=policy,
        train_collector=train_collector,
        test_collector=test_collector,        
        max_epoch=trainer_params['max_epoch'],
        step_per_epoch=trainer_params['step_per_epoch'],
        step_per_collect=trainer_params['step_per_collect'],        
        episode_per_test= trainer_params['episode_per_test'],
        batch_size=trainer_params['batch_size'],
        train_fn=train_fn,
        test_fn=test_fn,
        stop_fn=stop_fn,
        save_best_fn=save_best_fn,
        # save_test_best_fn=save_test_best_fn,
        update_per_step=trainer_params['update_per_step'],
        logger=logger,
        test_in_train=True,
        reward_metric=reward_metric,
        show_progress = True 
               
        )
    
    result = offPolicyTrainer.run()
    writer.close()
    # return result, policy.policies[agents[1]]
    print(f"\n==========Result==========\n{result}")
    print("\n(the trained policy can be accessed via policy.policies[agents[0]])")



Buffer Warming Up 

Buffer Lenght:  0.0


[34m[1mwandb[0m: Currently logged in as: [33mandrekuros[0m. Use [1m`wandb login --relogin`[0m to force relogin
  from IPython.core.display import HTML, display  # type: ignore


Steps Policy Saved  0
Best Saved Rew 1


Epoch #1: 20001it [01:15, 263.45it/s, env_step=20000, len=0, n/ep=0, n/st=400, pursuer_0/loss=0.062, pursuer_1/loss=0.073, pursuer_2/loss=0.089, pursuer_3/loss=0.123, pursuer_4/loss=0.251, pursuer_5/loss=0.165, pursuer_6/loss=0.094, pursuer_7/loss=0.214, rew=0.00]                           


Best Saved Rew 2
Epoch #1: test_reward: -362.189500 ± 15.988267, best_reward: -362.189500 ± 15.988267 in #1


Epoch #2: 20001it [01:11, 278.79it/s, env_step=40000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.211, pursuer_1/loss=0.229, pursuer_2/loss=0.158, pursuer_3/loss=0.241, pursuer_4/loss=0.343, pursuer_5/loss=0.388, pursuer_6/loss=0.285, pursuer_7/loss=0.365, rew=-224.32]                           


Best Saved Rew 4
Epoch #2: test_reward: -142.375375 ± 54.930285, best_reward: -142.375375 ± 54.930285 in #2


Epoch #3: 20001it [01:13, 271.56it/s, env_step=60000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.301, pursuer_1/loss=0.323, pursuer_2/loss=0.350, pursuer_3/loss=0.343, pursuer_4/loss=0.493, pursuer_5/loss=0.449, pursuer_6/loss=0.479, pursuer_7/loss=0.427, rew=-224.32]                           


Epoch #3: test_reward: -178.905458 ± 57.783014, best_reward: -142.375375 ± 54.930285 in #2


Epoch #4: 20001it [01:09, 286.38it/s, env_step=80000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.318, pursuer_1/loss=0.367, pursuer_2/loss=0.286, pursuer_3/loss=0.356, pursuer_4/loss=0.435, pursuer_5/loss=0.460, pursuer_6/loss=0.496, pursuer_7/loss=0.424, rew=-69.08]                           


Best Saved Rew 7
Epoch #4: test_reward: -32.242625 ± 67.873441, best_reward: -32.242625 ± 67.873441 in #4


Epoch #5: 20001it [01:14, 269.56it/s, env_step=100000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.357, pursuer_1/loss=0.371, pursuer_2/loss=0.267, pursuer_3/loss=0.385, pursuer_4/loss=0.396, pursuer_5/loss=0.547, pursuer_6/loss=0.464, pursuer_7/loss=0.439, rew=-69.08]                           


Epoch #5: test_reward: -64.970458 ± 64.733373, best_reward: -32.242625 ± 67.873441 in #4


Epoch #6: 20001it [01:10, 283.65it/s, env_step=120000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.432, pursuer_1/loss=0.471, pursuer_2/loss=0.352, pursuer_3/loss=0.355, pursuer_4/loss=0.544, pursuer_5/loss=0.598, pursuer_6/loss=0.597, pursuer_7/loss=0.562, rew=-38.73]                           


Epoch #6: test_reward: -82.717958 ± 62.940956, best_reward: -32.242625 ± 67.873441 in #4


Epoch #7: 20001it [01:11, 279.41it/s, env_step=140000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.443, pursuer_1/loss=0.423, pursuer_2/loss=0.499, pursuer_3/loss=0.439, pursuer_4/loss=0.542, pursuer_5/loss=0.703, pursuer_6/loss=0.721, pursuer_7/loss=0.574, rew=-38.73]                           


Steps Policy Saved  10
Epoch #7: test_reward: -124.357042 ± 58.509107, best_reward: -32.242625 ± 67.873441 in #4


Epoch #8: 20001it [01:07, 294.88it/s, env_step=160000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.509, pursuer_1/loss=0.388, pursuer_2/loss=0.429, pursuer_3/loss=0.424, pursuer_4/loss=0.631, pursuer_5/loss=0.601, pursuer_6/loss=0.553, pursuer_7/loss=0.556, rew=-13.77]                           


Epoch #8: test_reward: -140.385542 ± 64.295437, best_reward: -32.242625 ± 67.873441 in #4


Epoch #9: 20001it [01:14, 268.49it/s, env_step=180000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.495, pursuer_1/loss=0.396, pursuer_2/loss=0.453, pursuer_3/loss=0.455, pursuer_4/loss=0.623, pursuer_5/loss=0.597, pursuer_6/loss=0.565, pursuer_7/loss=0.558, rew=-13.77]                           


Epoch #9: test_reward: -195.519333 ± 72.022377, best_reward: -32.242625 ± 67.873441 in #4


Epoch #10: 20001it [01:07, 297.10it/s, env_step=200000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.391, pursuer_1/loss=0.494, pursuer_2/loss=0.503, pursuer_3/loss=0.494, pursuer_4/loss=0.590, pursuer_5/loss=0.716, pursuer_6/loss=0.624, pursuer_7/loss=0.530, rew=-1.28]                           


Epoch #10: test_reward: -100.674792 ± 62.964899, best_reward: -32.242625 ± 67.873441 in #4


Epoch #11: 20001it [01:12, 275.56it/s, env_step=220000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.516, pursuer_1/loss=0.494, pursuer_2/loss=0.522, pursuer_3/loss=0.490, pursuer_4/loss=0.769, pursuer_5/loss=0.651, pursuer_6/loss=0.756, pursuer_7/loss=0.597, rew=-1.28]                           


Epoch #11: test_reward: -77.089167 ± 45.471918, best_reward: -32.242625 ± 67.873441 in #4


Epoch #12: 20001it [01:07, 294.38it/s, env_step=240000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.530, pursuer_1/loss=0.459, pursuer_2/loss=0.547, pursuer_3/loss=0.502, pursuer_4/loss=0.650, pursuer_5/loss=0.731, pursuer_6/loss=0.660, pursuer_7/loss=0.587, rew=-13.30]                           


Best Saved Rew 19
Epoch #12: test_reward: -24.847500 ± 60.274563, best_reward: -24.847500 ± 60.274563 in #12


Epoch #13: 20001it [01:12, 275.99it/s, env_step=260000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.460, pursuer_1/loss=0.533, pursuer_2/loss=0.570, pursuer_3/loss=0.476, pursuer_4/loss=0.627, pursuer_5/loss=0.630, pursuer_6/loss=0.598, pursuer_7/loss=0.633, rew=-13.30]                           


Best Saved Rew 20
Epoch #13: test_reward: -17.451708 ± 50.735269, best_reward: -17.451708 ± 50.735269 in #13


Epoch #14: 20001it [01:07, 297.31it/s, env_step=280000, len=4000, n/ep=9, n/st=400, pursuer_0/loss=0.507, pursuer_1/loss=0.559, pursuer_2/loss=0.513, pursuer_3/loss=0.513, pursuer_4/loss=0.668, pursuer_5/loss=0.672, pursuer_6/loss=0.603, pursuer_7/loss=0.585, rew=1.32]                             


Best Saved Rew 23
Epoch #14: test_reward: 14.367583 ± 49.965794, best_reward: 14.367583 ± 49.965794 in #14


Epoch #15: 20001it [01:06, 300.72it/s, env_step=300000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.503, pursuer_1/loss=0.475, pursuer_2/loss=0.482, pursuer_3/loss=0.445, pursuer_4/loss=0.669, pursuer_5/loss=0.704, pursuer_6/loss=0.675, pursuer_7/loss=0.621, rew=1.32]                           


Epoch #15: test_reward: -5.923375 ± 83.483771, best_reward: 14.367583 ± 49.965794 in #14


Epoch #16: 20001it [01:01, 327.38it/s, env_step=320000, len=4000, n/ep=8, n/st=400, pursuer_0/loss=0.621, pursuer_1/loss=0.479, pursuer_2/loss=0.573, pursuer_3/loss=0.499, pursuer_4/loss=0.602, pursuer_5/loss=0.722, pursuer_6/loss=0.746, pursuer_7/loss=0.581, rew=13.41]                            


Epoch #16: test_reward: -43.202167 ± 62.522121, best_reward: 14.367583 ± 49.965794 in #14


Epoch #17: 20001it [01:06, 302.78it/s, env_step=340000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.610, pursuer_1/loss=0.592, pursuer_2/loss=0.624, pursuer_3/loss=0.633, pursuer_4/loss=0.631, pursuer_5/loss=0.714, pursuer_6/loss=0.685, pursuer_7/loss=0.799, rew=13.41]                           


Epoch #17: test_reward: -13.647083 ± 60.732727, best_reward: 14.367583 ± 49.965794 in #14


Epoch #18: 20001it [01:03, 313.70it/s, env_step=360000, len=4000, n/ep=8, n/st=400, pursuer_0/loss=0.603, pursuer_1/loss=0.638, pursuer_2/loss=0.656, pursuer_3/loss=0.599, pursuer_4/loss=0.711, pursuer_5/loss=0.752, pursuer_6/loss=0.754, pursuer_7/loss=0.718, rew=12.59]                            


Epoch #18: test_reward: -21.458958 ± 41.529736, best_reward: 14.367583 ± 49.965794 in #14


Epoch #19: 20001it [01:08, 292.73it/s, env_step=380000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.583, pursuer_1/loss=0.649, pursuer_2/loss=0.558, pursuer_3/loss=0.551, pursuer_4/loss=0.752, pursuer_5/loss=0.784, pursuer_6/loss=0.795, pursuer_7/loss=0.753, rew=12.59]                           


Epoch #19: test_reward: -2.131375 ± 72.129753, best_reward: 14.367583 ± 49.965794 in #14


Epoch #20: 20001it [01:01, 323.66it/s, env_step=400000, len=4000, n/ep=7, n/st=400, pursuer_0/loss=0.523, pursuer_1/loss=0.461, pursuer_2/loss=0.598, pursuer_3/loss=0.539, pursuer_4/loss=0.733, pursuer_5/loss=0.640, pursuer_6/loss=0.673, pursuer_7/loss=0.701, rew=67.12]                            


Epoch #20: test_reward: -17.552542 ± 66.894208, best_reward: 14.367583 ± 49.965794 in #14


Epoch #21: 20001it [01:06, 300.27it/s, env_step=420000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.594, pursuer_1/loss=0.595, pursuer_2/loss=0.653, pursuer_3/loss=0.607, pursuer_4/loss=0.796, pursuer_5/loss=0.794, pursuer_6/loss=0.775, pursuer_7/loss=0.767, rew=67.12]                           


Best Saved Rew 40
Epoch #21: test_reward: 36.511792 ± 53.878683, best_reward: 36.511792 ± 53.878683 in #21


Epoch #22: 20001it [01:02, 318.77it/s, env_step=440000, len=4000, n/ep=6, n/st=400, pursuer_0/loss=0.585, pursuer_1/loss=0.640, pursuer_2/loss=0.693, pursuer_3/loss=0.721, pursuer_4/loss=0.705, pursuer_5/loss=0.781, pursuer_6/loss=0.876, pursuer_7/loss=0.773, rew=53.95]                            


Best Saved Rew 46
Epoch #22: test_reward: 50.673875 ± 62.603588, best_reward: 50.673875 ± 62.603588 in #22


Epoch #23: 20001it [01:04, 309.13it/s, env_step=460000, len=2832, n/ep=0, n/st=400, pursuer_0/loss=0.648, pursuer_1/loss=0.622, pursuer_2/loss=0.691, pursuer_3/loss=0.613, pursuer_4/loss=0.760, pursuer_5/loss=0.737, pursuer_6/loss=0.736, pursuer_7/loss=0.785, rew=201.53]                           


Epoch #23: test_reward: 36.927542 ± 69.987901, best_reward: 50.673875 ± 62.603588 in #22


Epoch #24: 20001it [01:01, 324.71it/s, env_step=480000, len=4000, n/ep=5, n/st=400, pursuer_0/loss=0.539, pursuer_1/loss=0.590, pursuer_2/loss=0.686, pursuer_3/loss=0.551, pursuer_4/loss=0.693, pursuer_5/loss=0.611, pursuer_6/loss=0.744, pursuer_7/loss=0.787, rew=61.29]                            


Epoch #24: test_reward: 36.035000 ± 124.624752, best_reward: 50.673875 ± 62.603588 in #22


Epoch #25: 20001it [01:08, 291.65it/s, env_step=500000, len=1688, n/ep=0, n/st=400, pursuer_0/loss=0.639, pursuer_1/loss=0.635, pursuer_2/loss=0.709, pursuer_3/loss=0.612, pursuer_4/loss=0.731, pursuer_5/loss=0.722, pursuer_6/loss=0.674, pursuer_7/loss=0.873, rew=266.26]                           


Epoch #25: test_reward: 22.263792 ± 75.304980, best_reward: 50.673875 ± 62.603588 in #22


Epoch #26: 20001it [01:09, 287.78it/s, env_step=520000, len=4000, n/ep=3, n/st=400, pursuer_0/loss=0.659, pursuer_1/loss=0.598, pursuer_2/loss=0.614, pursuer_3/loss=0.618, pursuer_4/loss=0.738, pursuer_5/loss=0.738, pursuer_6/loss=0.653, pursuer_7/loss=0.727, rew=86.31]                            


Best Saved Rew 65
Epoch #26: test_reward: 183.522000 ± 126.833571, best_reward: 183.522000 ± 126.833571 in #26


Epoch #27: 20001it [01:08, 289.89it/s, env_step=540000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.648, pursuer_1/loss=0.748, pursuer_2/loss=0.690, pursuer_3/loss=0.735, pursuer_4/loss=0.913, pursuer_5/loss=0.755, pursuer_6/loss=0.765, pursuer_7/loss=0.881, rew=33.62]                           


Epoch #27: test_reward: 128.909042 ± 139.422740, best_reward: 183.522000 ± 126.833571 in #26


Epoch #28: 20001it [01:06, 299.83it/s, env_step=560000, len=4000, n/ep=2, n/st=400, pursuer_0/loss=0.625, pursuer_1/loss=0.601, pursuer_2/loss=0.732, pursuer_3/loss=0.658, pursuer_4/loss=0.763, pursuer_5/loss=0.667, pursuer_6/loss=0.730, pursuer_7/loss=0.786, rew=119.55]                           


Epoch #28: test_reward: 116.691750 ± 125.239318, best_reward: 183.522000 ± 126.833571 in #26


Epoch #29: 20001it [01:08, 292.87it/s, env_step=580000, len=2136, n/ep=0, n/st=400, pursuer_0/loss=0.716, pursuer_1/loss=0.841, pursuer_2/loss=0.760, pursuer_3/loss=0.815, pursuer_4/loss=0.838, pursuer_5/loss=0.839, pursuer_6/loss=0.776, pursuer_7/loss=0.934, rew=472.02]                           


Steps Policy Saved  80
Epoch #29: test_reward: 92.991333 ± 148.592470, best_reward: 183.522000 ± 126.833571 in #26


Epoch #30: 20001it [01:07, 296.16it/s, env_step=600000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.668, pursuer_1/loss=0.646, pursuer_2/loss=0.705, pursuer_3/loss=0.723, pursuer_4/loss=0.803, pursuer_5/loss=0.774, pursuer_6/loss=0.835, pursuer_7/loss=0.767, rew=99.90]                            


Epoch #30: test_reward: 91.434000 ± 81.757737, best_reward: 183.522000 ± 126.833571 in #26


Epoch #31: 20001it [01:06, 298.61it/s, env_step=620000, len=3456, n/ep=0, n/st=400, pursuer_0/loss=0.741, pursuer_1/loss=0.704, pursuer_2/loss=0.820, pursuer_3/loss=0.839, pursuer_4/loss=0.759, pursuer_5/loss=0.846, pursuer_6/loss=0.919, pursuer_7/loss=0.977, rew=175.75]                           


Epoch #31: test_reward: 96.236250 ± 125.832425, best_reward: 183.522000 ± 126.833571 in #26


Epoch #32: 20001it [01:07, 295.38it/s, env_step=640000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.772, pursuer_1/loss=0.742, pursuer_2/loss=0.794, pursuer_3/loss=0.721, pursuer_4/loss=0.795, pursuer_5/loss=0.788, pursuer_6/loss=0.839, pursuer_7/loss=0.947, rew=143.68]                           


Steps Policy Saved  100
Epoch #32: test_reward: 110.812833 ± 137.655161, best_reward: 183.522000 ± 126.833571 in #26


Epoch #33: 20001it [01:08, 292.11it/s, env_step=660000, len=1576, n/ep=1, n/st=400, pursuer_0/loss=0.809, pursuer_1/loss=0.769, pursuer_2/loss=0.757, pursuer_3/loss=0.805, pursuer_4/loss=0.881, pursuer_5/loss=0.864, pursuer_6/loss=1.040, pursuer_7/loss=0.889, rew=332.02]                           


Epoch #33: test_reward: 180.603750 ± 144.237586, best_reward: 183.522000 ± 126.833571 in #26


Epoch #34: 20001it [01:03, 315.04it/s, env_step=680000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.725, pursuer_1/loss=0.815, pursuer_2/loss=0.770, pursuer_3/loss=0.938, pursuer_4/loss=0.790, pursuer_5/loss=0.904, pursuer_6/loss=0.838, pursuer_7/loss=0.944, rew=56.73]                            


Epoch #34: test_reward: 162.140292 ± 116.032927, best_reward: 183.522000 ± 126.833571 in #26


Epoch #35: 20001it [01:03, 316.94it/s, env_step=700000, len=3544, n/ep=0, n/st=400, pursuer_0/loss=0.878, pursuer_1/loss=0.876, pursuer_2/loss=0.885, pursuer_3/loss=0.976, pursuer_4/loss=0.845, pursuer_5/loss=0.914, pursuer_6/loss=0.948, pursuer_7/loss=1.000, rew=122.92]                           


Epoch #35: test_reward: 96.721583 ± 84.729346, best_reward: 183.522000 ± 126.833571 in #26


Epoch #36: 20001it [01:07, 296.76it/s, env_step=720000, len=1744, n/ep=0, n/st=400, pursuer_0/loss=0.877, pursuer_1/loss=0.779, pursuer_2/loss=0.877, pursuer_3/loss=0.963, pursuer_4/loss=0.919, pursuer_5/loss=0.960, pursuer_6/loss=0.988, pursuer_7/loss=0.953, rew=501.32]                           


Epoch #36: test_reward: 142.281583 ± 149.570392, best_reward: 183.522000 ± 126.833571 in #26


Epoch #37: 20001it [01:06, 298.69it/s, env_step=740000, len=3544, n/ep=0, n/st=400, pursuer_0/loss=0.821, pursuer_1/loss=0.896, pursuer_2/loss=0.936, pursuer_3/loss=1.088, pursuer_4/loss=1.071, pursuer_5/loss=0.950, pursuer_6/loss=1.210, pursuer_7/loss=1.050, rew=152.63]                           


Epoch #37: test_reward: 121.947167 ± 88.041789, best_reward: 183.522000 ± 126.833571 in #26


Epoch #38: 20001it [01:07, 298.16it/s, env_step=760000, len=3272, n/ep=0, n/st=400, pursuer_0/loss=0.967, pursuer_1/loss=1.003, pursuer_2/loss=0.949, pursuer_3/loss=1.084, pursuer_4/loss=0.923, pursuer_5/loss=1.042, pursuer_6/loss=1.117, pursuer_7/loss=1.185, rew=130.03]                           


Epoch #38: test_reward: 55.780708 ± 110.512813, best_reward: 183.522000 ± 126.833571 in #26


Epoch #39: 20001it [01:07, 295.38it/s, env_step=780000, len=3944, n/ep=0, n/st=400, pursuer_0/loss=0.816, pursuer_1/loss=1.035, pursuer_2/loss=1.077, pursuer_3/loss=1.064, pursuer_4/loss=0.925, pursuer_5/loss=0.910, pursuer_6/loss=1.024, pursuer_7/loss=1.009, rew=105.86]                           


Best Saved Rew 158
Epoch #39: test_reward: 219.641167 ± 151.464216, best_reward: 219.641167 ± 151.464216 in #39


Epoch #40: 20001it [01:07, 295.45it/s, env_step=800000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.922, pursuer_1/loss=1.018, pursuer_2/loss=1.058, pursuer_3/loss=0.974, pursuer_4/loss=0.885, pursuer_5/loss=0.995, pursuer_6/loss=1.239, pursuer_7/loss=0.992, rew=86.30]                            


Epoch #40: test_reward: 134.761375 ± 141.144011, best_reward: 219.641167 ± 151.464216 in #39


Epoch #41: 20001it [01:07, 295.51it/s, env_step=820000, len=2568, n/ep=0, n/st=400, pursuer_0/loss=1.082, pursuer_1/loss=1.024, pursuer_2/loss=1.191, pursuer_3/loss=1.140, pursuer_4/loss=1.151, pursuer_5/loss=1.154, pursuer_6/loss=1.237, pursuer_7/loss=1.079, rew=298.64]                           


Epoch #41: test_reward: 66.725125 ± 99.040437, best_reward: 219.641167 ± 151.464216 in #39


Epoch #42: 20001it [01:07, 296.59it/s, env_step=840000, len=3400, n/ep=0, n/st=400, pursuer_0/loss=0.965, pursuer_1/loss=0.994, pursuer_2/loss=1.032, pursuer_3/loss=0.966, pursuer_4/loss=0.990, pursuer_5/loss=1.133, pursuer_6/loss=1.133, pursuer_7/loss=1.054, rew=122.02]                           


Steps Policy Saved  180
Epoch #42: test_reward: 129.746000 ± 138.763859, best_reward: 219.641167 ± 151.464216 in #39


Epoch #43: 20001it [01:07, 297.40it/s, env_step=860000, len=1668, n/ep=0, n/st=400, pursuer_0/loss=0.963, pursuer_1/loss=1.075, pursuer_2/loss=1.024, pursuer_3/loss=1.078, pursuer_4/loss=1.041, pursuer_5/loss=1.064, pursuer_6/loss=1.173, pursuer_7/loss=1.019, rew=365.77]                           


Epoch #43: test_reward: 106.720667 ± 109.886474, best_reward: 219.641167 ± 151.464216 in #39


Epoch #44: 20001it [01:06, 301.15it/s, env_step=880000, len=2936, n/ep=0, n/st=400, pursuer_0/loss=0.957, pursuer_1/loss=1.031, pursuer_2/loss=0.876, pursuer_3/loss=0.958, pursuer_4/loss=1.038, pursuer_5/loss=1.042, pursuer_6/loss=0.992, pursuer_7/loss=0.981, rew=178.01]                           


Epoch #44: test_reward: 144.942500 ± 135.797506, best_reward: 219.641167 ± 151.464216 in #39


Epoch #45: 20001it [01:07, 296.35it/s, env_step=900000, len=1744, n/ep=0, n/st=400, pursuer_0/loss=0.955, pursuer_1/loss=0.995, pursuer_2/loss=1.118, pursuer_3/loss=1.102, pursuer_4/loss=1.097, pursuer_5/loss=1.121, pursuer_6/loss=1.165, pursuer_7/loss=1.134, rew=414.78]                           


Best Saved Rew 202
Epoch #45: test_reward: 292.098417 ± 161.802681, best_reward: 292.098417 ± 161.802681 in #45


Epoch #46: 20001it [01:06, 299.19it/s, env_step=920000, len=1712, n/ep=0, n/st=400, pursuer_0/loss=0.983, pursuer_1/loss=0.994, pursuer_2/loss=1.135, pursuer_3/loss=1.070, pursuer_4/loss=1.039, pursuer_5/loss=1.072, pursuer_6/loss=1.133, pursuer_7/loss=1.211, rew=289.44]                           


Epoch #46: test_reward: 209.638125 ± 127.188349, best_reward: 292.098417 ± 161.802681 in #45


Epoch #47: 20001it [01:06, 299.55it/s, env_step=940000, len=1848, n/ep=0, n/st=400, pursuer_0/loss=1.027, pursuer_1/loss=1.085, pursuer_2/loss=1.213, pursuer_3/loss=1.206, pursuer_4/loss=1.023, pursuer_5/loss=1.181, pursuer_6/loss=1.307, pursuer_7/loss=1.165, rew=539.89]                           


Epoch #47: test_reward: 236.621833 ± 143.542360, best_reward: 292.098417 ± 161.802681 in #45


Epoch #48: 20001it [01:08, 293.20it/s, env_step=960000, len=1688, n/ep=0, n/st=400, pursuer_0/loss=0.989, pursuer_1/loss=1.118, pursuer_2/loss=1.200, pursuer_3/loss=1.266, pursuer_4/loss=1.064, pursuer_5/loss=1.112, pursuer_6/loss=1.276, pursuer_7/loss=1.232, rew=411.68]                           


Epoch #48: test_reward: 82.657792 ± 116.469883, best_reward: 292.098417 ± 161.802681 in #45


Epoch #49: 20001it [01:06, 301.99it/s, env_step=980000, len=1452, n/ep=0, n/st=400, pursuer_0/loss=1.192, pursuer_1/loss=1.204, pursuer_2/loss=1.312, pursuer_3/loss=1.088, pursuer_4/loss=1.142, pursuer_5/loss=1.322, pursuer_6/loss=1.427, pursuer_7/loss=1.455, rew=392.24]                           


Epoch #49: test_reward: 243.455708 ± 149.900158, best_reward: 292.098417 ± 161.802681 in #45


Epoch #50: 20001it [01:04, 311.07it/s, env_step=1000000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.057, pursuer_1/loss=1.223, pursuer_2/loss=1.194, pursuer_3/loss=1.328, pursuer_4/loss=1.162, pursuer_5/loss=1.236, pursuer_6/loss=1.456, pursuer_7/loss=1.427, rew=99.23]                           


Epoch #50: test_reward: 136.091375 ± 136.700255, best_reward: 292.098417 ± 161.802681 in #45


Epoch #51: 20001it [01:02, 317.96it/s, env_step=1020000, len=3968, n/ep=0, n/st=400, pursuer_0/loss=1.106, pursuer_1/loss=1.162, pursuer_2/loss=1.199, pursuer_3/loss=1.119, pursuer_4/loss=1.143, pursuer_5/loss=1.321, pursuer_6/loss=1.228, pursuer_7/loss=1.148, rew=127.22]                           


Epoch #51: test_reward: 203.570542 ± 186.246524, best_reward: 292.098417 ± 161.802681 in #45


Epoch #52: 20001it [01:01, 322.67it/s, env_step=1040000, len=2464, n/ep=0, n/st=400, pursuer_0/loss=1.007, pursuer_1/loss=1.194, pursuer_2/loss=1.148, pursuer_3/loss=1.172, pursuer_4/loss=1.148, pursuer_5/loss=1.223, pursuer_6/loss=1.423, pursuer_7/loss=1.063, rew=294.06]                           


Epoch #52: test_reward: 233.299208 ± 188.934178, best_reward: 292.098417 ± 161.802681 in #45


Epoch #53: 20001it [01:01, 325.52it/s, env_step=1060000, len=1272, n/ep=0, n/st=400, pursuer_0/loss=1.026, pursuer_1/loss=1.275, pursuer_2/loss=1.344, pursuer_3/loss=1.320, pursuer_4/loss=1.212, pursuer_5/loss=1.290, pursuer_6/loss=1.376, pursuer_7/loss=1.335, rew=442.14]                           


Epoch #53: test_reward: 119.639292 ± 99.445055, best_reward: 292.098417 ± 161.802681 in #45


Epoch #54: 20001it [01:00, 329.27it/s, env_step=1080000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.206, pursuer_1/loss=1.331, pursuer_2/loss=1.406, pursuer_3/loss=1.490, pursuer_4/loss=1.144, pursuer_5/loss=1.546, pursuer_6/loss=1.395, pursuer_7/loss=1.440, rew=154.70]                           


Steps Policy Saved  280
Epoch #54: test_reward: 254.963125 ± 138.060162, best_reward: 292.098417 ± 161.802681 in #45


Epoch #55: 20001it [01:00, 328.25it/s, env_step=1100000, len=2064, n/ep=1, n/st=400, pursuer_0/loss=1.131, pursuer_1/loss=1.316, pursuer_2/loss=1.313, pursuer_3/loss=1.346, pursuer_4/loss=1.168, pursuer_5/loss=1.322, pursuer_6/loss=1.287, pursuer_7/loss=1.347, rew=443.69]                           


Steps Policy Saved  290
Epoch #55: test_reward: 168.538917 ± 202.489535, best_reward: 292.098417 ± 161.802681 in #45


Epoch #56: 20001it [00:59, 338.16it/s, env_step=1120000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.112, pursuer_1/loss=1.171, pursuer_2/loss=1.329, pursuer_3/loss=1.350, pursuer_4/loss=1.190, pursuer_5/loss=1.292, pursuer_6/loss=1.350, pursuer_7/loss=1.286, rew=159.58]                           


Epoch #56: test_reward: 133.390375 ± 125.411556, best_reward: 292.098417 ± 161.802681 in #45


Epoch #57: 20001it [00:59, 338.02it/s, env_step=1140000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.264, pursuer_1/loss=1.391, pursuer_2/loss=1.394, pursuer_3/loss=1.527, pursuer_4/loss=1.332, pursuer_5/loss=1.471, pursuer_6/loss=1.379, pursuer_7/loss=1.352, rew=204.36]                           


Epoch #57: test_reward: 236.202042 ± 125.531296, best_reward: 292.098417 ± 161.802681 in #45


Epoch #58: 20001it [00:58, 340.84it/s, env_step=1160000, len=3584, n/ep=0, n/st=400, pursuer_0/loss=1.132, pursuer_1/loss=1.264, pursuer_2/loss=1.210, pursuer_3/loss=1.265, pursuer_4/loss=1.232, pursuer_5/loss=1.407, pursuer_6/loss=1.250, pursuer_7/loss=1.301, rew=167.80]                           


Epoch #58: test_reward: 106.914167 ± 109.230483, best_reward: 292.098417 ± 161.802681 in #45


Epoch #59: 20001it [01:00, 331.81it/s, env_step=1180000, len=1728, n/ep=0, n/st=400, pursuer_0/loss=1.179, pursuer_1/loss=1.116, pursuer_2/loss=1.196, pursuer_3/loss=1.259, pursuer_4/loss=1.193, pursuer_5/loss=1.337, pursuer_6/loss=1.258, pursuer_7/loss=1.368, rew=432.63]                           


Epoch #59: test_reward: 158.009667 ± 115.144171, best_reward: 292.098417 ± 161.802681 in #45


Epoch #60: 20001it [00:58, 339.36it/s, env_step=1200000, len=1968, n/ep=0, n/st=400, pursuer_0/loss=1.058, pursuer_1/loss=1.240, pursuer_2/loss=1.175, pursuer_3/loss=1.360, pursuer_4/loss=1.239, pursuer_5/loss=1.256, pursuer_6/loss=1.236, pursuer_7/loss=1.338, rew=487.40]                           


Epoch #60: test_reward: 211.982167 ± 180.997512, best_reward: 292.098417 ± 161.802681 in #45


Epoch #61: 20001it [00:59, 335.46it/s, env_step=1220000, len=1448, n/ep=0, n/st=400, pursuer_0/loss=1.222, pursuer_1/loss=1.364, pursuer_2/loss=1.366, pursuer_3/loss=1.375, pursuer_4/loss=1.313, pursuer_5/loss=1.446, pursuer_6/loss=1.399, pursuer_7/loss=1.515, rew=368.97]                           


Steps Policy Saved  340
Epoch #61: test_reward: 141.229583 ± 119.265622, best_reward: 292.098417 ± 161.802681 in #45


Epoch #62: 20001it [01:00, 333.16it/s, env_step=1240000, len=824, n/ep=0, n/st=400, pursuer_0/loss=1.079, pursuer_1/loss=1.193, pursuer_2/loss=1.193, pursuer_3/loss=1.171, pursuer_4/loss=1.172, pursuer_5/loss=1.287, pursuer_6/loss=1.257, pursuer_7/loss=1.326, rew=456.27]                            


Epoch #62: test_reward: 129.589708 ± 119.887114, best_reward: 292.098417 ± 161.802681 in #45


Epoch #63: 20001it [00:58, 344.15it/s, env_step=1260000, len=1648, n/ep=0, n/st=400, pursuer_0/loss=1.152, pursuer_1/loss=1.218, pursuer_2/loss=1.217, pursuer_3/loss=1.294, pursuer_4/loss=1.217, pursuer_5/loss=1.239, pursuer_6/loss=1.232, pursuer_7/loss=1.261, rew=415.05]                           


Epoch #63: test_reward: 291.101625 ± 195.036061, best_reward: 292.098417 ± 161.802681 in #45


Epoch #64: 20001it [00:59, 335.25it/s, env_step=1280000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.119, pursuer_1/loss=1.290, pursuer_2/loss=1.249, pursuer_3/loss=1.304, pursuer_4/loss=1.113, pursuer_5/loss=1.311, pursuer_6/loss=1.392, pursuer_7/loss=1.463, rew=296.43]                           


Epoch #64: test_reward: 206.443000 ± 138.667678, best_reward: 292.098417 ± 161.802681 in #45


Epoch #65: 20001it [00:57, 346.32it/s, env_step=1300000, len=2280, n/ep=0, n/st=400, pursuer_0/loss=1.070, pursuer_1/loss=1.228, pursuer_2/loss=1.112, pursuer_3/loss=1.237, pursuer_4/loss=1.152, pursuer_5/loss=1.170, pursuer_6/loss=1.255, pursuer_7/loss=1.269, rew=311.67]                           


Epoch #65: test_reward: 235.082625 ± 154.629670, best_reward: 292.098417 ± 161.802681 in #45


Epoch #66: 20001it [00:59, 337.19it/s, env_step=1320000, len=3704, n/ep=0, n/st=400, pursuer_0/loss=1.083, pursuer_1/loss=1.280, pursuer_2/loss=1.141, pursuer_3/loss=1.208, pursuer_4/loss=1.086, pursuer_5/loss=1.272, pursuer_6/loss=1.176, pursuer_7/loss=1.327, rew=164.19]                           


Epoch #66: test_reward: 225.314667 ± 172.812246, best_reward: 292.098417 ± 161.802681 in #45


Epoch #67: 20001it [00:58, 341.41it/s, env_step=1340000, len=1720, n/ep=0, n/st=400, pursuer_0/loss=1.108, pursuer_1/loss=1.204, pursuer_2/loss=1.330, pursuer_3/loss=1.406, pursuer_4/loss=1.186, pursuer_5/loss=1.337, pursuer_6/loss=1.268, pursuer_7/loss=1.304, rew=328.28]                           


Epoch #67: test_reward: 287.817250 ± 160.651752, best_reward: 292.098417 ± 161.802681 in #45


Epoch #68: 20001it [00:58, 339.23it/s, env_step=1360000, len=3232, n/ep=0, n/st=400, pursuer_0/loss=0.988, pursuer_1/loss=1.125, pursuer_2/loss=1.274, pursuer_3/loss=1.259, pursuer_4/loss=1.080, pursuer_5/loss=1.289, pursuer_6/loss=1.182, pursuer_7/loss=1.279, rew=273.90]                           


Best Saved Rew 394
Epoch #68: test_reward: 339.951750 ± 205.388751, best_reward: 339.951750 ± 205.388751 in #68


Epoch #69: 20001it [00:59, 334.39it/s, env_step=1380000, len=1456, n/ep=1, n/st=400, pursuer_0/loss=1.098, pursuer_1/loss=1.116, pursuer_2/loss=1.203, pursuer_3/loss=1.265, pursuer_4/loss=1.043, pursuer_5/loss=1.208, pursuer_6/loss=1.218, pursuer_7/loss=1.261, rew=415.28]                           


Epoch #69: test_reward: 130.329458 ± 116.799738, best_reward: 339.951750 ± 205.388751 in #68


Epoch #70: 20001it [00:59, 334.33it/s, env_step=1400000, len=1728, n/ep=0, n/st=400, pursuer_0/loss=0.989, pursuer_1/loss=1.059, pursuer_2/loss=1.140, pursuer_3/loss=1.173, pursuer_4/loss=1.071, pursuer_5/loss=1.101, pursuer_6/loss=1.198, pursuer_7/loss=1.284, rew=391.58]                           


Steps Policy Saved  410
Best Saved Rew 411
Epoch #70: test_reward: 344.593333 ± 209.315791, best_reward: 344.593333 ± 209.315791 in #70


Epoch #71: 20001it [01:00, 329.78it/s, env_step=1420000, len=2384, n/ep=0, n/st=400, pursuer_0/loss=1.022, pursuer_1/loss=1.074, pursuer_2/loss=1.033, pursuer_3/loss=1.263, pursuer_4/loss=1.128, pursuer_5/loss=1.302, pursuer_6/loss=1.217, pursuer_7/loss=1.361, rew=301.87]                           


Steps Policy Saved  420
Epoch #71: test_reward: 323.221250 ± 150.799500, best_reward: 344.593333 ± 209.315791 in #70


Epoch #72: 20001it [01:00, 330.74it/s, env_step=1440000, len=2288, n/ep=0, n/st=400, pursuer_0/loss=1.133, pursuer_1/loss=1.175, pursuer_2/loss=1.262, pursuer_3/loss=1.394, pursuer_4/loss=1.229, pursuer_5/loss=1.285, pursuer_6/loss=1.380, pursuer_7/loss=1.300, rew=316.97]                           


Epoch #72: test_reward: 230.709250 ± 168.794096, best_reward: 344.593333 ± 209.315791 in #70


Epoch #73: 20001it [01:02, 321.28it/s, env_step=1460000, len=2824, n/ep=0, n/st=400, pursuer_0/loss=1.029, pursuer_1/loss=1.111, pursuer_2/loss=1.234, pursuer_3/loss=1.146, pursuer_4/loss=1.095, pursuer_5/loss=1.168, pursuer_6/loss=1.208, pursuer_7/loss=1.289, rew=208.34]                           


Epoch #73: test_reward: 179.788250 ± 211.291385, best_reward: 344.593333 ± 209.315791 in #70


Epoch #74: 20001it [01:03, 314.73it/s, env_step=1480000, len=928, n/ep=0, n/st=400, pursuer_0/loss=1.113, pursuer_1/loss=1.221, pursuer_2/loss=1.163, pursuer_3/loss=1.250, pursuer_4/loss=1.070, pursuer_5/loss=1.161, pursuer_6/loss=1.222, pursuer_7/loss=1.300, rew=470.89]                            


Epoch #74: test_reward: 233.622542 ± 177.366532, best_reward: 344.593333 ± 209.315791 in #70


Epoch #75: 20001it [01:07, 294.28it/s, env_step=1500000, len=1880, n/ep=0, n/st=400, pursuer_0/loss=1.206, pursuer_1/loss=1.156, pursuer_2/loss=1.232, pursuer_3/loss=1.178, pursuer_4/loss=1.066, pursuer_5/loss=1.289, pursuer_6/loss=1.357, pursuer_7/loss=1.284, rew=410.64]                           


Epoch #75: test_reward: 273.068333 ± 194.381720, best_reward: 344.593333 ± 209.315791 in #70


Epoch #76: 20001it [01:07, 297.10it/s, env_step=1520000, len=2584, n/ep=0, n/st=400, pursuer_0/loss=1.143, pursuer_1/loss=1.171, pursuer_2/loss=1.326, pursuer_3/loss=1.172, pursuer_4/loss=1.231, pursuer_5/loss=1.234, pursuer_6/loss=1.375, pursuer_7/loss=1.372, rew=305.87]                           


Epoch #76: test_reward: 202.020917 ± 150.323196, best_reward: 344.593333 ± 209.315791 in #70


Epoch #77: 20001it [01:07, 298.11it/s, env_step=1540000, len=1032, n/ep=0, n/st=400, pursuer_0/loss=1.154, pursuer_1/loss=1.064, pursuer_2/loss=1.192, pursuer_3/loss=1.188, pursuer_4/loss=1.133, pursuer_5/loss=1.219, pursuer_6/loss=1.166, pursuer_7/loss=1.260, rew=485.11]                           


Epoch #77: test_reward: 167.653667 ± 180.082567, best_reward: 344.593333 ± 209.315791 in #70


Epoch #78: 20001it [01:06, 298.86it/s, env_step=1560000, len=3856, n/ep=0, n/st=400, pursuer_0/loss=1.160, pursuer_1/loss=1.114, pursuer_2/loss=1.232, pursuer_3/loss=1.342, pursuer_4/loss=1.143, pursuer_5/loss=1.311, pursuer_6/loss=1.228, pursuer_7/loss=1.236, rew=160.42]                           


Epoch #78: test_reward: 204.491792 ± 151.035404, best_reward: 344.593333 ± 209.315791 in #70


Epoch #79: 20001it [01:06, 301.13it/s, env_step=1580000, len=3808, n/ep=0, n/st=400, pursuer_0/loss=1.310, pursuer_1/loss=1.185, pursuer_2/loss=1.218, pursuer_3/loss=1.318, pursuer_4/loss=1.274, pursuer_5/loss=1.219, pursuer_6/loss=1.317, pursuer_7/loss=1.203, rew=379.64]                           


Epoch #79: test_reward: 263.334000 ± 175.806037, best_reward: 344.593333 ± 209.315791 in #70


Epoch #80: 20001it [01:06, 302.08it/s, env_step=1600000, len=1976, n/ep=1, n/st=400, pursuer_0/loss=1.171, pursuer_1/loss=1.066, pursuer_2/loss=1.123, pursuer_3/loss=1.168, pursuer_4/loss=1.085, pursuer_5/loss=1.134, pursuer_6/loss=1.204, pursuer_7/loss=1.075, rew=508.09]                           


Epoch #80: test_reward: 277.256792 ± 185.419484, best_reward: 344.593333 ± 209.315791 in #70


Epoch #81: 20001it [01:07, 297.98it/s, env_step=1620000, len=2408, n/ep=0, n/st=400, pursuer_0/loss=1.060, pursuer_1/loss=1.265, pursuer_2/loss=1.189, pursuer_3/loss=1.171, pursuer_4/loss=1.220, pursuer_5/loss=1.246, pursuer_6/loss=1.305, pursuer_7/loss=1.246, rew=284.60]                           


Epoch #81: test_reward: 243.033000 ± 127.804400, best_reward: 344.593333 ± 209.315791 in #70


Epoch #82: 20001it [01:05, 306.07it/s, env_step=1640000, len=2240, n/ep=0, n/st=400, pursuer_0/loss=1.134, pursuer_1/loss=1.211, pursuer_2/loss=1.281, pursuer_3/loss=1.101, pursuer_4/loss=1.221, pursuer_5/loss=1.140, pursuer_6/loss=1.273, pursuer_7/loss=1.319, rew=260.19]                           


Epoch #82: test_reward: 225.658792 ± 163.446101, best_reward: 344.593333 ± 209.315791 in #70


Epoch #83: 20001it [01:05, 305.06it/s, env_step=1660000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.196, pursuer_1/loss=1.117, pursuer_2/loss=1.155, pursuer_3/loss=1.311, pursuer_4/loss=1.129, pursuer_5/loss=1.170, pursuer_6/loss=1.301, pursuer_7/loss=1.297, rew=60.72]                            


Epoch #83: test_reward: 245.520042 ± 168.895814, best_reward: 344.593333 ± 209.315791 in #70


Epoch #84: 20001it [01:06, 301.04it/s, env_step=1680000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.194, pursuer_1/loss=1.149, pursuer_2/loss=1.130, pursuer_3/loss=1.157, pursuer_4/loss=1.066, pursuer_5/loss=0.973, pursuer_6/loss=1.172, pursuer_7/loss=1.233, rew=264.46]                           


Epoch #84: test_reward: 259.973958 ± 139.867807, best_reward: 344.593333 ± 209.315791 in #70


Epoch #85: 20001it [01:05, 307.39it/s, env_step=1700000, len=3080, n/ep=0, n/st=400, pursuer_0/loss=1.148, pursuer_1/loss=1.171, pursuer_2/loss=1.231, pursuer_3/loss=1.197, pursuer_4/loss=1.201, pursuer_5/loss=1.162, pursuer_6/loss=1.184, pursuer_7/loss=1.251, rew=324.38]                           


Epoch #85: test_reward: 325.702667 ± 222.177550, best_reward: 344.593333 ± 209.315791 in #70


Epoch #86: 20001it [01:05, 303.51it/s, env_step=1720000, len=2088, n/ep=0, n/st=400, pursuer_0/loss=1.145, pursuer_1/loss=1.044, pursuer_2/loss=1.134, pursuer_3/loss=1.166, pursuer_4/loss=1.097, pursuer_5/loss=1.235, pursuer_6/loss=1.344, pursuer_7/loss=1.255, rew=322.27]                           


Epoch #86: test_reward: 151.541458 ± 143.178812, best_reward: 344.593333 ± 209.315791 in #70


Epoch #87: 20001it [01:07, 297.46it/s, env_step=1740000, len=1312, n/ep=0, n/st=400, pursuer_0/loss=1.000, pursuer_1/loss=1.091, pursuer_2/loss=1.218, pursuer_3/loss=1.119, pursuer_4/loss=1.016, pursuer_5/loss=1.194, pursuer_6/loss=1.213, pursuer_7/loss=1.284, rew=412.32]                           


Epoch #87: test_reward: 202.566000 ± 126.059879, best_reward: 344.593333 ± 209.315791 in #70


Epoch #88: 20001it [01:05, 305.01it/s, env_step=1760000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.150, pursuer_1/loss=0.981, pursuer_2/loss=1.122, pursuer_3/loss=1.197, pursuer_4/loss=1.023, pursuer_5/loss=1.169, pursuer_6/loss=1.260, pursuer_7/loss=1.149, rew=81.79]                            


Epoch #88: test_reward: 242.163542 ± 154.688764, best_reward: 344.593333 ± 209.315791 in #70


Epoch #89: 20001it [01:07, 294.47it/s, env_step=1780000, len=1376, n/ep=0, n/st=400, pursuer_0/loss=1.017, pursuer_1/loss=1.054, pursuer_2/loss=1.123, pursuer_3/loss=1.061, pursuer_4/loss=1.028, pursuer_5/loss=1.074, pursuer_6/loss=1.117, pursuer_7/loss=1.089, rew=435.71]                           


Epoch #89: test_reward: 275.406958 ± 185.871253, best_reward: 344.593333 ± 209.315791 in #70


Epoch #90: 20001it [01:06, 299.52it/s, env_step=1800000, len=1344, n/ep=0, n/st=400, pursuer_0/loss=0.976, pursuer_1/loss=1.182, pursuer_2/loss=1.121, pursuer_3/loss=1.274, pursuer_4/loss=1.130, pursuer_5/loss=1.188, pursuer_6/loss=1.333, pursuer_7/loss=1.219, rew=479.71]                           


Steps Policy Saved  570
Epoch #90: test_reward: 220.865667 ± 164.138117, best_reward: 344.593333 ± 209.315791 in #70


Epoch #91: 20001it [01:06, 302.82it/s, env_step=1820000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.157, pursuer_1/loss=1.118, pursuer_2/loss=1.207, pursuer_3/loss=1.128, pursuer_4/loss=1.183, pursuer_5/loss=1.148, pursuer_6/loss=1.281, pursuer_7/loss=1.311, rew=191.13]                           


Epoch #91: test_reward: 213.410542 ± 155.246813, best_reward: 344.593333 ± 209.315791 in #70


Epoch #92: 20001it [01:05, 305.03it/s, env_step=1840000, len=2080, n/ep=0, n/st=400, pursuer_0/loss=1.042, pursuer_1/loss=1.175, pursuer_2/loss=1.102, pursuer_3/loss=1.214, pursuer_4/loss=1.154, pursuer_5/loss=1.206, pursuer_6/loss=1.202, pursuer_7/loss=1.240, rew=441.93]                           


Epoch #92: test_reward: 305.845042 ± 186.313413, best_reward: 344.593333 ± 209.315791 in #70


Epoch #93: 20001it [01:07, 296.51it/s, env_step=1860000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.086, pursuer_1/loss=1.194, pursuer_2/loss=1.222, pursuer_3/loss=1.187, pursuer_4/loss=1.214, pursuer_5/loss=1.185, pursuer_6/loss=1.206, pursuer_7/loss=1.279, rew=123.99]                           


Epoch #93: test_reward: 207.881333 ± 132.765622, best_reward: 344.593333 ± 209.315791 in #70


Epoch #94: 20001it [01:05, 304.94it/s, env_step=1880000, len=1424, n/ep=0, n/st=400, pursuer_0/loss=0.970, pursuer_1/loss=1.054, pursuer_2/loss=1.034, pursuer_3/loss=1.156, pursuer_4/loss=1.123, pursuer_5/loss=1.201, pursuer_6/loss=1.207, pursuer_7/loss=1.048, rew=472.90]                           


Epoch #94: test_reward: 198.143042 ± 174.926736, best_reward: 344.593333 ± 209.315791 in #70


Epoch #95: 20001it [01:06, 301.22it/s, env_step=1900000, len=1760, n/ep=1, n/st=400, pursuer_0/loss=0.996, pursuer_1/loss=1.098, pursuer_2/loss=1.147, pursuer_3/loss=1.171, pursuer_4/loss=1.025, pursuer_5/loss=1.193, pursuer_6/loss=1.291, pursuer_7/loss=1.163, rew=354.21]                           


Epoch #95: test_reward: 264.344833 ± 178.922671, best_reward: 344.593333 ± 209.315791 in #70


Epoch #96: 20001it [01:06, 302.32it/s, env_step=1920000, len=3768, n/ep=0, n/st=400, pursuer_0/loss=1.056, pursuer_1/loss=1.099, pursuer_2/loss=1.122, pursuer_3/loss=1.072, pursuer_4/loss=1.062, pursuer_5/loss=1.170, pursuer_6/loss=1.108, pursuer_7/loss=1.086, rew=130.88]                           


Epoch #96: test_reward: 312.670667 ± 221.448181, best_reward: 344.593333 ± 209.315791 in #70


Epoch #97: 20001it [01:06, 301.85it/s, env_step=1940000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.130, pursuer_1/loss=1.148, pursuer_2/loss=1.157, pursuer_3/loss=1.084, pursuer_4/loss=1.069, pursuer_5/loss=1.161, pursuer_6/loss=1.152, pursuer_7/loss=1.050, rew=104.29]                           


Epoch #97: test_reward: 310.070167 ± 150.330663, best_reward: 344.593333 ± 209.315791 in #70


Epoch #98: 20001it [01:07, 296.75it/s, env_step=1960000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.039, pursuer_1/loss=1.139, pursuer_2/loss=1.076, pursuer_3/loss=1.076, pursuer_4/loss=1.206, pursuer_5/loss=1.098, pursuer_6/loss=1.202, pursuer_7/loss=1.095, rew=269.31]                           


Epoch #98: test_reward: 230.592917 ± 150.737885, best_reward: 344.593333 ± 209.315791 in #70


Epoch #99: 20001it [01:07, 297.93it/s, env_step=1980000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.097, pursuer_1/loss=1.296, pursuer_2/loss=1.134, pursuer_3/loss=1.192, pursuer_4/loss=1.312, pursuer_5/loss=1.220, pursuer_6/loss=1.277, pursuer_7/loss=1.163, rew=75.84]                            


Epoch #99: test_reward: 281.287833 ± 156.653868, best_reward: 344.593333 ± 209.315791 in #70


Epoch #100: 20001it [01:07, 298.42it/s, env_step=2000000, len=1272, n/ep=0, n/st=400, pursuer_0/loss=1.104, pursuer_1/loss=1.228, pursuer_2/loss=1.117, pursuer_3/loss=1.429, pursuer_4/loss=1.191, pursuer_5/loss=1.177, pursuer_6/loss=1.216, pursuer_7/loss=1.287, rew=511.39]                           


Steps Policy Saved  650
Epoch #100: test_reward: 235.087208 ± 148.016187, best_reward: 344.593333 ± 209.315791 in #70


Epoch #101: 20001it [01:05, 304.52it/s, env_step=2020000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.976, pursuer_1/loss=1.148, pursuer_2/loss=1.122, pursuer_3/loss=1.192, pursuer_4/loss=1.124, pursuer_5/loss=1.189, pursuer_6/loss=1.162, pursuer_7/loss=1.225, rew=71.24]                            


Epoch #101: test_reward: 251.561958 ± 161.504209, best_reward: 344.593333 ± 209.315791 in #70


Epoch #102: 20001it [01:08, 294.12it/s, env_step=2040000, len=1960, n/ep=1, n/st=400, pursuer_0/loss=1.118, pursuer_1/loss=1.107, pursuer_2/loss=1.120, pursuer_3/loss=1.103, pursuer_4/loss=1.065, pursuer_5/loss=1.192, pursuer_6/loss=1.080, pursuer_7/loss=1.212, rew=417.16]                           


Epoch #102: test_reward: 212.174000 ± 145.749188, best_reward: 344.593333 ± 209.315791 in #70


Epoch #103: 20001it [01:06, 300.51it/s, env_step=2060000, len=2720, n/ep=0, n/st=400, pursuer_0/loss=1.162, pursuer_1/loss=1.208, pursuer_2/loss=1.223, pursuer_3/loss=1.275, pursuer_4/loss=1.167, pursuer_5/loss=1.233, pursuer_6/loss=1.211, pursuer_7/loss=1.121, rew=241.42]                           


Epoch #103: test_reward: 247.064750 ± 147.063671, best_reward: 344.593333 ± 209.315791 in #70


Epoch #104: 20001it [01:06, 300.58it/s, env_step=2080000, len=1488, n/ep=0, n/st=400, pursuer_0/loss=1.119, pursuer_1/loss=1.244, pursuer_2/loss=1.269, pursuer_3/loss=1.391, pursuer_4/loss=1.164, pursuer_5/loss=1.337, pursuer_6/loss=1.236, pursuer_7/loss=1.222, rew=416.68]                           


Epoch #104: test_reward: 251.130667 ± 180.904848, best_reward: 344.593333 ± 209.315791 in #70


Epoch #105: 20001it [01:06, 302.23it/s, env_step=2100000, len=2304, n/ep=0, n/st=400, pursuer_0/loss=1.083, pursuer_1/loss=1.075, pursuer_2/loss=1.262, pursuer_3/loss=1.205, pursuer_4/loss=1.002, pursuer_5/loss=1.183, pursuer_6/loss=1.257, pursuer_7/loss=1.235, rew=242.31]                           


Epoch #105: test_reward: 174.799417 ± 144.408956, best_reward: 344.593333 ± 209.315791 in #70


Epoch #106: 20001it [01:07, 296.72it/s, env_step=2120000, len=2256, n/ep=0, n/st=400, pursuer_0/loss=1.023, pursuer_1/loss=1.156, pursuer_2/loss=1.248, pursuer_3/loss=1.174, pursuer_4/loss=1.074, pursuer_5/loss=1.110, pursuer_6/loss=1.230, pursuer_7/loss=1.286, rew=260.02]                           


Epoch #106: test_reward: 243.563833 ± 137.793841, best_reward: 344.593333 ± 209.315791 in #70


Epoch #107: 20001it [01:08, 293.86it/s, env_step=2140000, len=1432, n/ep=0, n/st=400, pursuer_0/loss=1.163, pursuer_1/loss=1.102, pursuer_2/loss=1.218, pursuer_3/loss=1.214, pursuer_4/loss=1.206, pursuer_5/loss=1.255, pursuer_6/loss=1.174, pursuer_7/loss=1.307, rew=531.53]                           


Epoch #107: test_reward: 220.526083 ± 140.767521, best_reward: 344.593333 ± 209.315791 in #70


Epoch #108: 20001it [01:07, 295.89it/s, env_step=2160000, len=1912, n/ep=0, n/st=400, pursuer_0/loss=1.232, pursuer_1/loss=1.257, pursuer_2/loss=1.279, pursuer_3/loss=1.255, pursuer_4/loss=1.265, pursuer_5/loss=1.253, pursuer_6/loss=1.287, pursuer_7/loss=1.386, rew=423.42]                           


Epoch #108: test_reward: 208.620833 ± 146.423906, best_reward: 344.593333 ± 209.315791 in #70


Epoch #109: 20001it [01:07, 296.26it/s, env_step=2180000, len=1912, n/ep=0, n/st=400, pursuer_0/loss=1.148, pursuer_1/loss=1.119, pursuer_2/loss=1.171, pursuer_3/loss=1.314, pursuer_4/loss=1.242, pursuer_5/loss=1.351, pursuer_6/loss=1.389, pursuer_7/loss=1.270, rew=388.54]                           


Steps Policy Saved  730
Epoch #109: test_reward: 246.167792 ± 177.320394, best_reward: 344.593333 ± 209.315791 in #70


Epoch #110: 20001it [01:06, 302.65it/s, env_step=2200000, len=3256, n/ep=0, n/st=400, pursuer_0/loss=1.230, pursuer_1/loss=1.146, pursuer_2/loss=1.121, pursuer_3/loss=1.217, pursuer_4/loss=1.327, pursuer_5/loss=1.214, pursuer_6/loss=1.211, pursuer_7/loss=1.265, rew=261.86]                           


Epoch #110: test_reward: 277.152750 ± 160.272596, best_reward: 344.593333 ± 209.315791 in #70


Epoch #111: 20001it [01:05, 303.21it/s, env_step=2220000, len=2400, n/ep=0, n/st=400, pursuer_0/loss=1.361, pursuer_1/loss=1.313, pursuer_2/loss=1.439, pursuer_3/loss=1.470, pursuer_4/loss=1.393, pursuer_5/loss=1.342, pursuer_6/loss=1.255, pursuer_7/loss=1.276, rew=249.16]                           


Best Saved Rew 750
Epoch #111: test_reward: 374.474125 ± 149.183616, best_reward: 374.474125 ± 149.183616 in #111


Epoch #112: 20001it [01:07, 298.09it/s, env_step=2240000, len=1688, n/ep=0, n/st=400, pursuer_0/loss=1.132, pursuer_1/loss=1.236, pursuer_2/loss=1.186, pursuer_3/loss=1.405, pursuer_4/loss=1.232, pursuer_5/loss=1.370, pursuer_6/loss=1.352, pursuer_7/loss=1.389, rew=372.54]                           


Epoch #112: test_reward: 328.794375 ± 168.142556, best_reward: 374.474125 ± 149.183616 in #111


Epoch #113: 20001it [01:05, 304.35it/s, env_step=2260000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.087, pursuer_1/loss=1.176, pursuer_2/loss=1.239, pursuer_3/loss=1.330, pursuer_4/loss=1.205, pursuer_5/loss=1.268, pursuer_6/loss=1.312, pursuer_7/loss=1.259, rew=79.81]                            


Epoch #113: test_reward: 209.913917 ± 117.296447, best_reward: 374.474125 ± 149.183616 in #111


Epoch #114: 20001it [01:07, 296.38it/s, env_step=2280000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.208, pursuer_1/loss=1.182, pursuer_2/loss=1.271, pursuer_3/loss=1.326, pursuer_4/loss=1.235, pursuer_5/loss=1.286, pursuer_6/loss=1.266, pursuer_7/loss=1.325, rew=103.89]                           


Epoch #114: test_reward: 280.507667 ± 158.912859, best_reward: 374.474125 ± 149.183616 in #111


Epoch #115: 20001it [01:06, 301.22it/s, env_step=2300000, len=1976, n/ep=1, n/st=400, pursuer_0/loss=1.093, pursuer_1/loss=1.040, pursuer_2/loss=1.170, pursuer_3/loss=1.283, pursuer_4/loss=1.108, pursuer_5/loss=1.109, pursuer_6/loss=1.310, pursuer_7/loss=1.331, rew=472.84]                           


Epoch #115: test_reward: 190.983500 ± 187.938226, best_reward: 374.474125 ± 149.183616 in #111


Epoch #116: 20001it [01:06, 300.23it/s, env_step=2320000, len=2384, n/ep=0, n/st=400, pursuer_0/loss=1.235, pursuer_1/loss=1.184, pursuer_2/loss=1.303, pursuer_3/loss=1.308, pursuer_4/loss=1.191, pursuer_5/loss=1.228, pursuer_6/loss=1.360, pursuer_7/loss=1.434, rew=301.83]                           


Epoch #116: test_reward: 280.319958 ± 202.515217, best_reward: 374.474125 ± 149.183616 in #111


Epoch #117: 20001it [01:06, 301.26it/s, env_step=2340000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.147, pursuer_1/loss=1.258, pursuer_2/loss=1.279, pursuer_3/loss=1.437, pursuer_4/loss=1.271, pursuer_5/loss=1.247, pursuer_6/loss=1.349, pursuer_7/loss=1.359, rew=310.33]                           


Epoch #117: test_reward: 187.189625 ± 202.628989, best_reward: 374.474125 ± 149.183616 in #111


Epoch #118: 20001it [01:07, 297.62it/s, env_step=2360000, len=1824, n/ep=0, n/st=400, pursuer_0/loss=1.186, pursuer_1/loss=1.142, pursuer_2/loss=1.224, pursuer_3/loss=1.483, pursuer_4/loss=1.383, pursuer_5/loss=1.369, pursuer_6/loss=1.444, pursuer_7/loss=1.371, rew=308.06]                           


Epoch #118: test_reward: 166.372833 ± 151.078933, best_reward: 374.474125 ± 149.183616 in #111


Epoch #119: 20001it [01:06, 299.27it/s, env_step=2380000, len=1288, n/ep=0, n/st=400, pursuer_0/loss=1.322, pursuer_1/loss=1.252, pursuer_2/loss=1.395, pursuer_3/loss=1.366, pursuer_4/loss=1.365, pursuer_5/loss=1.329, pursuer_6/loss=1.448, pursuer_7/loss=1.530, rew=311.52]                           


Epoch #119: test_reward: 149.772000 ± 140.933017, best_reward: 374.474125 ± 149.183616 in #111


Epoch #120: 20001it [01:06, 301.16it/s, env_step=2400000, len=2728, n/ep=0, n/st=400, pursuer_0/loss=1.224, pursuer_1/loss=1.226, pursuer_2/loss=1.214, pursuer_3/loss=1.588, pursuer_4/loss=1.383, pursuer_5/loss=1.324, pursuer_6/loss=1.415, pursuer_7/loss=1.500, rew=336.85]                           


Epoch #120: test_reward: 209.715542 ± 126.373009, best_reward: 374.474125 ± 149.183616 in #111


Epoch #121: 20001it [01:06, 300.42it/s, env_step=2420000, len=2312, n/ep=0, n/st=400, pursuer_0/loss=1.374, pursuer_1/loss=1.406, pursuer_2/loss=1.374, pursuer_3/loss=1.751, pursuer_4/loss=1.449, pursuer_5/loss=1.415, pursuer_6/loss=1.429, pursuer_7/loss=1.489, rew=242.81]                           


Epoch #121: test_reward: 186.864958 ± 158.645898, best_reward: 374.474125 ± 149.183616 in #111


Epoch #122: 20001it [01:07, 295.25it/s, env_step=2440000, len=1648, n/ep=0, n/st=400, pursuer_0/loss=1.187, pursuer_1/loss=1.087, pursuer_2/loss=1.227, pursuer_3/loss=1.410, pursuer_4/loss=1.258, pursuer_5/loss=1.234, pursuer_6/loss=1.250, pursuer_7/loss=1.426, rew=380.47]                           


Epoch #122: test_reward: 288.822375 ± 170.586081, best_reward: 374.474125 ± 149.183616 in #111


Epoch #123: 20001it [01:06, 300.03it/s, env_step=2460000, len=2176, n/ep=0, n/st=400, pursuer_0/loss=1.202, pursuer_1/loss=1.190, pursuer_2/loss=1.244, pursuer_3/loss=1.351, pursuer_4/loss=1.261, pursuer_5/loss=1.343, pursuer_6/loss=1.402, pursuer_7/loss=1.441, rew=248.92]                           


Epoch #123: test_reward: 289.872833 ± 166.010066, best_reward: 374.474125 ± 149.183616 in #111


Epoch #124: 20001it [01:06, 300.99it/s, env_step=2480000, len=3108, n/ep=0, n/st=400, pursuer_0/loss=1.047, pursuer_1/loss=1.091, pursuer_2/loss=1.208, pursuer_3/loss=1.254, pursuer_4/loss=1.115, pursuer_5/loss=1.125, pursuer_6/loss=1.226, pursuer_7/loss=1.237, rew=361.00]                           


Epoch #124: test_reward: 213.329542 ± 117.943897, best_reward: 374.474125 ± 149.183616 in #111


Epoch #125: 20001it [01:06, 301.28it/s, env_step=2500000, len=3328, n/ep=0, n/st=400, pursuer_0/loss=1.151, pursuer_1/loss=1.235, pursuer_2/loss=1.374, pursuer_3/loss=1.502, pursuer_4/loss=1.338, pursuer_5/loss=1.338, pursuer_6/loss=1.201, pursuer_7/loss=1.599, rew=208.87]                           


Epoch #125: test_reward: 172.683042 ± 165.563668, best_reward: 374.474125 ± 149.183616 in #111


Epoch #126: 20001it [01:06, 300.14it/s, env_step=2520000, len=2592, n/ep=0, n/st=400, pursuer_0/loss=1.132, pursuer_1/loss=1.242, pursuer_2/loss=1.200, pursuer_3/loss=1.311, pursuer_4/loss=1.142, pursuer_5/loss=1.294, pursuer_6/loss=1.262, pursuer_7/loss=1.499, rew=245.55]                           


Epoch #126: test_reward: 183.402167 ± 146.197152, best_reward: 374.474125 ± 149.183616 in #111


Epoch #127: 20001it [01:06, 300.95it/s, env_step=2540000, len=1464, n/ep=1, n/st=400, pursuer_0/loss=1.222, pursuer_1/loss=1.128, pursuer_2/loss=1.270, pursuer_3/loss=1.393, pursuer_4/loss=1.222, pursuer_5/loss=1.224, pursuer_6/loss=1.307, pursuer_7/loss=1.392, rew=391.99]                           


Epoch #127: test_reward: 261.572792 ± 202.968384, best_reward: 374.474125 ± 149.183616 in #111


Epoch #128: 20001it [01:06, 299.60it/s, env_step=2560000, len=2212, n/ep=0, n/st=400, pursuer_0/loss=1.050, pursuer_1/loss=1.136, pursuer_2/loss=1.190, pursuer_3/loss=1.360, pursuer_4/loss=1.104, pursuer_5/loss=1.209, pursuer_6/loss=1.110, pursuer_7/loss=1.380, rew=325.14]                           


Epoch #128: test_reward: 225.184333 ± 169.301700, best_reward: 374.474125 ± 149.183616 in #111


Epoch #129: 20001it [01:06, 302.17it/s, env_step=2580000, len=3264, n/ep=0, n/st=400, pursuer_0/loss=1.089, pursuer_1/loss=1.157, pursuer_2/loss=1.222, pursuer_3/loss=1.448, pursuer_4/loss=1.191, pursuer_5/loss=1.228, pursuer_6/loss=1.109, pursuer_7/loss=1.356, rew=213.03]                           


Epoch #129: test_reward: 254.378875 ± 122.734624, best_reward: 374.474125 ± 149.183616 in #111


Epoch #130: 20001it [01:05, 306.02it/s, env_step=2600000, len=2784, n/ep=0, n/st=400, pursuer_0/loss=0.975, pursuer_1/loss=1.128, pursuer_2/loss=1.093, pursuer_3/loss=1.296, pursuer_4/loss=1.101, pursuer_5/loss=1.237, pursuer_6/loss=1.174, pursuer_7/loss=1.388, rew=318.51]                           


Epoch #130: test_reward: 321.498042 ± 185.015759, best_reward: 374.474125 ± 149.183616 in #111


Epoch #131: 20001it [01:06, 299.24it/s, env_step=2620000, len=1536, n/ep=1, n/st=400, pursuer_0/loss=1.142, pursuer_1/loss=1.214, pursuer_2/loss=1.301, pursuer_3/loss=1.300, pursuer_4/loss=1.290, pursuer_5/loss=1.269, pursuer_6/loss=1.288, pursuer_7/loss=1.388, rew=686.00]                           


Epoch #131: test_reward: 243.482583 ± 190.457001, best_reward: 374.474125 ± 149.183616 in #111


Epoch #132: 20001it [01:05, 304.21it/s, env_step=2640000, len=2440, n/ep=0, n/st=400, pursuer_0/loss=1.164, pursuer_1/loss=1.217, pursuer_2/loss=1.146, pursuer_3/loss=1.178, pursuer_4/loss=1.284, pursuer_5/loss=1.213, pursuer_6/loss=1.284, pursuer_7/loss=1.442, rew=470.50]                           


Epoch #132: test_reward: 218.337333 ± 150.982226, best_reward: 374.474125 ± 149.183616 in #111


Epoch #133: 20001it [01:05, 303.61it/s, env_step=2660000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.136, pursuer_1/loss=1.181, pursuer_2/loss=1.295, pursuer_3/loss=1.319, pursuer_4/loss=1.156, pursuer_5/loss=1.150, pursuer_6/loss=1.235, pursuer_7/loss=1.347, rew=78.64]                            


Epoch #133: test_reward: 125.399375 ± 171.523644, best_reward: 374.474125 ± 149.183616 in #111


Epoch #134: 20001it [01:06, 300.30it/s, env_step=2680000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.986, pursuer_1/loss=1.236, pursuer_2/loss=1.153, pursuer_3/loss=1.289, pursuer_4/loss=1.175, pursuer_5/loss=1.322, pursuer_6/loss=1.158, pursuer_7/loss=1.300, rew=234.35]                           


Steps Policy Saved  940
Epoch #134: test_reward: 296.283583 ± 190.340762, best_reward: 374.474125 ± 149.183616 in #111


Epoch #135: 20001it [01:05, 305.67it/s, env_step=2700000, len=3056, n/ep=1, n/st=400, pursuer_0/loss=1.135, pursuer_1/loss=1.242, pursuer_2/loss=1.306, pursuer_3/loss=1.405, pursuer_4/loss=1.395, pursuer_5/loss=1.246, pursuer_6/loss=1.278, pursuer_7/loss=1.450, rew=455.28]                           


Epoch #135: test_reward: 168.548750 ± 168.496035, best_reward: 374.474125 ± 149.183616 in #111


Epoch #136: 20001it [01:05, 303.28it/s, env_step=2720000, len=2352, n/ep=0, n/st=400, pursuer_0/loss=1.195, pursuer_1/loss=1.304, pursuer_2/loss=1.279, pursuer_3/loss=1.447, pursuer_4/loss=1.319, pursuer_5/loss=1.325, pursuer_6/loss=1.317, pursuer_7/loss=1.517, rew=288.88]                           


Epoch #136: test_reward: 226.449708 ± 166.982432, best_reward: 374.474125 ± 149.183616 in #111


Epoch #137: 20001it [01:05, 305.96it/s, env_step=2740000, len=3432, n/ep=0, n/st=400, pursuer_0/loss=1.036, pursuer_1/loss=1.166, pursuer_2/loss=1.241, pursuer_3/loss=1.261, pursuer_4/loss=1.128, pursuer_5/loss=1.365, pursuer_6/loss=1.221, pursuer_7/loss=1.408, rew=421.95]                           


Epoch #137: test_reward: 248.817167 ± 196.105373, best_reward: 374.474125 ± 149.183616 in #111


Epoch #138: 20001it [01:06, 302.89it/s, env_step=2760000, len=1248, n/ep=0, n/st=400, pursuer_0/loss=1.217, pursuer_1/loss=1.272, pursuer_2/loss=1.429, pursuer_3/loss=1.404, pursuer_4/loss=1.181, pursuer_5/loss=1.159, pursuer_6/loss=1.427, pursuer_7/loss=1.534, rew=440.71]                           


Epoch #138: test_reward: 232.422750 ± 163.576505, best_reward: 374.474125 ± 149.183616 in #111


Epoch #139: 20001it [01:05, 304.50it/s, env_step=2780000, len=1560, n/ep=0, n/st=400, pursuer_0/loss=1.236, pursuer_1/loss=1.343, pursuer_2/loss=1.410, pursuer_3/loss=1.558, pursuer_4/loss=1.383, pursuer_5/loss=1.372, pursuer_6/loss=1.316, pursuer_7/loss=1.588, rew=354.54]                           


Epoch #139: test_reward: 204.275167 ± 221.383743, best_reward: 374.474125 ± 149.183616 in #111


Epoch #140: 20001it [01:05, 303.11it/s, env_step=2800000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.067, pursuer_1/loss=1.161, pursuer_2/loss=1.194, pursuer_3/loss=1.348, pursuer_4/loss=1.123, pursuer_5/loss=1.221, pursuer_6/loss=1.173, pursuer_7/loss=1.333, rew=92.54]                            


Steps Policy Saved  990
Epoch #140: test_reward: 113.281375 ± 151.223206, best_reward: 374.474125 ± 149.183616 in #111


Epoch #141: 20001it [01:06, 301.61it/s, env_step=2820000, len=1344, n/ep=1, n/st=400, pursuer_0/loss=1.045, pursuer_1/loss=1.181, pursuer_2/loss=1.319, pursuer_3/loss=1.271, pursuer_4/loss=1.067, pursuer_5/loss=1.233, pursuer_6/loss=1.247, pursuer_7/loss=1.374, rew=337.90]                           


Epoch #141: test_reward: 140.066625 ± 155.306422, best_reward: 374.474125 ± 149.183616 in #111


Epoch #142: 20001it [01:06, 302.55it/s, env_step=2840000, len=2536, n/ep=0, n/st=400, pursuer_0/loss=1.255, pursuer_1/loss=1.240, pursuer_2/loss=1.478, pursuer_3/loss=1.383, pursuer_4/loss=1.270, pursuer_5/loss=1.335, pursuer_6/loss=1.371, pursuer_7/loss=1.378, rew=330.43]                           


Epoch #142: test_reward: 167.281042 ± 188.273481, best_reward: 374.474125 ± 149.183616 in #111


Epoch #143: 20001it [01:05, 305.13it/s, env_step=2860000, len=1344, n/ep=0, n/st=400, pursuer_0/loss=1.181, pursuer_1/loss=1.358, pursuer_2/loss=1.216, pursuer_3/loss=1.250, pursuer_4/loss=1.201, pursuer_5/loss=1.354, pursuer_6/loss=1.345, pursuer_7/loss=1.286, rew=461.11]                           


Epoch #143: test_reward: 291.453583 ± 190.136698, best_reward: 374.474125 ± 149.183616 in #111


Epoch #144: 20001it [01:03, 313.16it/s, env_step=2880000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.176, pursuer_1/loss=1.286, pursuer_2/loss=1.172, pursuer_3/loss=1.335, pursuer_4/loss=1.201, pursuer_5/loss=1.263, pursuer_6/loss=1.291, pursuer_7/loss=1.347, rew=96.73]                            


Epoch #144: test_reward: 267.982000 ± 207.727076, best_reward: 374.474125 ± 149.183616 in #111


Epoch #145: 20001it [01:03, 314.37it/s, env_step=2900000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.130, pursuer_1/loss=1.263, pursuer_2/loss=1.275, pursuer_3/loss=1.272, pursuer_4/loss=1.229, pursuer_5/loss=1.254, pursuer_6/loss=1.340, pursuer_7/loss=1.474, rew=225.54]                           


Epoch #145: test_reward: 157.276333 ± 93.843023, best_reward: 374.474125 ± 149.183616 in #111


Epoch #146: 20001it [01:03, 315.45it/s, env_step=2920000, len=1624, n/ep=0, n/st=400, pursuer_0/loss=1.211, pursuer_1/loss=1.216, pursuer_2/loss=1.158, pursuer_3/loss=1.267, pursuer_4/loss=1.106, pursuer_5/loss=1.387, pursuer_6/loss=1.284, pursuer_7/loss=1.334, rew=486.47]                           


Epoch #146: test_reward: 278.600917 ± 182.489811, best_reward: 374.474125 ± 149.183616 in #111


Epoch #147: 20001it [01:01, 323.55it/s, env_step=2940000, len=2224, n/ep=0, n/st=400, pursuer_0/loss=1.162, pursuer_1/loss=1.182, pursuer_2/loss=1.258, pursuer_3/loss=1.196, pursuer_4/loss=1.177, pursuer_5/loss=1.172, pursuer_6/loss=1.205, pursuer_7/loss=1.412, rew=486.56]                           


Epoch #147: test_reward: 358.806708 ± 208.358816, best_reward: 374.474125 ± 149.183616 in #111


Epoch #148: 20001it [01:01, 324.37it/s, env_step=2960000, len=1616, n/ep=1, n/st=400, pursuer_0/loss=1.115, pursuer_1/loss=1.253, pursuer_2/loss=1.216, pursuer_3/loss=1.262, pursuer_4/loss=1.161, pursuer_5/loss=1.228, pursuer_6/loss=1.308, pursuer_7/loss=1.361, rew=379.71]                           


Epoch #148: test_reward: 298.777167 ± 157.312969, best_reward: 374.474125 ± 149.183616 in #111


Epoch #149: 20001it [01:00, 330.50it/s, env_step=2980000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.048, pursuer_1/loss=1.222, pursuer_2/loss=1.183, pursuer_3/loss=1.153, pursuer_4/loss=1.182, pursuer_5/loss=1.248, pursuer_6/loss=1.337, pursuer_7/loss=1.305, rew=91.21]                            


Epoch #149: test_reward: 224.917708 ± 208.187385, best_reward: 374.474125 ± 149.183616 in #111


Epoch #150: 20001it [01:00, 332.33it/s, env_step=3000000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.063, pursuer_1/loss=1.140, pursuer_2/loss=1.133, pursuer_3/loss=1.325, pursuer_4/loss=1.201, pursuer_5/loss=1.253, pursuer_6/loss=1.276, pursuer_7/loss=1.329, rew=190.77]                           


Epoch #150: test_reward: 171.254750 ± 232.646247, best_reward: 374.474125 ± 149.183616 in #111


Epoch #151: 20001it [01:00, 331.08it/s, env_step=3020000, len=1848, n/ep=0, n/st=400, pursuer_0/loss=1.130, pursuer_1/loss=1.259, pursuer_2/loss=1.170, pursuer_3/loss=1.195, pursuer_4/loss=1.260, pursuer_5/loss=1.178, pursuer_6/loss=1.254, pursuer_7/loss=1.220, rew=494.22]                           


Epoch #151: test_reward: 198.438958 ± 189.512054, best_reward: 374.474125 ± 149.183616 in #111


Epoch #152: 20001it [00:59, 333.90it/s, env_step=3040000, len=3296, n/ep=1, n/st=400, pursuer_0/loss=1.266, pursuer_1/loss=1.204, pursuer_2/loss=1.224, pursuer_3/loss=1.372, pursuer_4/loss=1.356, pursuer_5/loss=1.267, pursuer_6/loss=1.395, pursuer_7/loss=1.402, rew=224.26]                           


Epoch #152: test_reward: 164.041500 ± 149.525513, best_reward: 374.474125 ± 149.183616 in #111


Epoch #153: 20001it [00:59, 336.38it/s, env_step=3060000, len=3728, n/ep=0, n/st=400, pursuer_0/loss=1.274, pursuer_1/loss=1.279, pursuer_2/loss=1.333, pursuer_3/loss=1.421, pursuer_4/loss=1.251, pursuer_5/loss=1.472, pursuer_6/loss=1.355, pursuer_7/loss=1.330, rew=147.32]                           


Epoch #153: test_reward: 256.996667 ± 217.219804, best_reward: 374.474125 ± 149.183616 in #111


Epoch #154: 20001it [00:58, 342.45it/s, env_step=3080000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.088, pursuer_1/loss=1.202, pursuer_2/loss=1.218, pursuer_3/loss=1.279, pursuer_4/loss=1.331, pursuer_5/loss=1.429, pursuer_6/loss=1.258, pursuer_7/loss=1.412, rew=99.68]                            


Epoch #154: test_reward: 256.324000 ± 195.922749, best_reward: 374.474125 ± 149.183616 in #111


Epoch #155: 20001it [01:00, 328.55it/s, env_step=3100000, len=2264, n/ep=0, n/st=400, pursuer_0/loss=1.258, pursuer_1/loss=1.125, pursuer_2/loss=1.238, pursuer_3/loss=1.151, pursuer_4/loss=1.161, pursuer_5/loss=1.165, pursuer_6/loss=1.272, pursuer_7/loss=1.278, rew=443.32]                           


Epoch #155: test_reward: 259.357167 ± 135.133345, best_reward: 374.474125 ± 149.183616 in #111


Epoch #156: 20001it [00:58, 340.48it/s, env_step=3120000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.092, pursuer_1/loss=1.065, pursuer_2/loss=1.143, pursuer_3/loss=1.218, pursuer_4/loss=1.268, pursuer_5/loss=1.117, pursuer_6/loss=1.179, pursuer_7/loss=1.249, rew=96.23]                            


Epoch #156: test_reward: 334.808375 ± 153.330004, best_reward: 374.474125 ± 149.183616 in #111


Epoch #157: 20001it [00:59, 338.58it/s, env_step=3140000, len=3800, n/ep=1, n/st=400, pursuer_0/loss=1.056, pursuer_1/loss=1.167, pursuer_2/loss=1.081, pursuer_3/loss=1.155, pursuer_4/loss=1.055, pursuer_5/loss=1.197, pursuer_6/loss=1.154, pursuer_7/loss=1.211, rew=384.26]                           


Epoch #157: test_reward: 257.251542 ± 174.243063, best_reward: 374.474125 ± 149.183616 in #111


Epoch #158: 20001it [00:59, 338.30it/s, env_step=3160000, len=1008, n/ep=0, n/st=400, pursuer_0/loss=1.100, pursuer_1/loss=1.143, pursuer_2/loss=1.073, pursuer_3/loss=1.161, pursuer_4/loss=1.147, pursuer_5/loss=1.206, pursuer_6/loss=1.287, pursuer_7/loss=1.262, rew=462.61]                           


Epoch #158: test_reward: 337.060792 ± 224.321560, best_reward: 374.474125 ± 149.183616 in #111


Epoch #159: 20001it [00:59, 333.76it/s, env_step=3180000, len=2048, n/ep=0, n/st=400, pursuer_0/loss=1.100, pursuer_1/loss=1.176, pursuer_2/loss=1.183, pursuer_3/loss=1.291, pursuer_4/loss=1.070, pursuer_5/loss=1.246, pursuer_6/loss=1.287, pursuer_7/loss=1.100, rew=254.54]                           


Epoch #159: test_reward: 150.953958 ± 201.183956, best_reward: 374.474125 ± 149.183616 in #111


Epoch #160: 20001it [00:58, 340.85it/s, env_step=3200000, len=2584, n/ep=0, n/st=400, pursuer_0/loss=1.059, pursuer_1/loss=0.983, pursuer_2/loss=1.145, pursuer_3/loss=1.128, pursuer_4/loss=1.167, pursuer_5/loss=1.104, pursuer_6/loss=1.110, pursuer_7/loss=1.173, rew=346.23]                           


Epoch #160: test_reward: 339.244167 ± 164.615489, best_reward: 374.474125 ± 149.183616 in #111


Epoch #161: 20001it [01:00, 328.57it/s, env_step=3220000, len=1928, n/ep=0, n/st=400, pursuer_0/loss=1.031, pursuer_1/loss=0.945, pursuer_2/loss=1.102, pursuer_3/loss=1.107, pursuer_4/loss=1.028, pursuer_5/loss=1.036, pursuer_6/loss=1.118, pursuer_7/loss=1.237, rew=431.72]                           


Epoch #161: test_reward: 245.596417 ± 181.961682, best_reward: 374.474125 ± 149.183616 in #111


Epoch #162: 20001it [00:59, 335.33it/s, env_step=3240000, len=1984, n/ep=0, n/st=400, pursuer_0/loss=1.103, pursuer_1/loss=1.110, pursuer_2/loss=1.211, pursuer_3/loss=1.345, pursuer_4/loss=1.229, pursuer_5/loss=1.143, pursuer_6/loss=1.232, pursuer_7/loss=1.308, rew=357.76]                           


Steps Policy Saved  1170
Epoch #162: test_reward: 249.274458 ± 143.511581, best_reward: 374.474125 ± 149.183616 in #111


Epoch #163: 20001it [01:00, 332.98it/s, env_step=3260000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.171, pursuer_1/loss=1.132, pursuer_2/loss=1.185, pursuer_3/loss=1.162, pursuer_4/loss=1.155, pursuer_5/loss=1.255, pursuer_6/loss=1.431, pursuer_7/loss=1.209, rew=410.11]                           


Steps Policy Saved  1180
Epoch #163: test_reward: 332.574083 ± 191.717573, best_reward: 374.474125 ± 149.183616 in #111


Epoch #164: 20001it [01:00, 329.28it/s, env_step=3280000, len=2464, n/ep=0, n/st=400, pursuer_0/loss=1.137, pursuer_1/loss=1.071, pursuer_2/loss=1.263, pursuer_3/loss=1.188, pursuer_4/loss=1.181, pursuer_5/loss=0.988, pursuer_6/loss=1.215, pursuer_7/loss=1.238, rew=216.06]                           


Epoch #164: test_reward: 300.140667 ± 155.179308, best_reward: 374.474125 ± 149.183616 in #111


Epoch #165: 20001it [01:00, 328.39it/s, env_step=3300000, len=1448, n/ep=1, n/st=400, pursuer_0/loss=1.088, pursuer_1/loss=1.178, pursuer_2/loss=1.458, pursuer_3/loss=1.231, pursuer_4/loss=1.235, pursuer_5/loss=1.231, pursuer_6/loss=1.376, pursuer_7/loss=1.463, rew=510.42]                           


Epoch #165: test_reward: 345.477625 ± 189.590840, best_reward: 374.474125 ± 149.183616 in #111


Epoch #166: 20001it [01:00, 331.92it/s, env_step=3320000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.101, pursuer_1/loss=1.046, pursuer_2/loss=1.148, pursuer_3/loss=1.135, pursuer_4/loss=1.123, pursuer_5/loss=1.179, pursuer_6/loss=1.280, pursuer_7/loss=1.197, rew=120.44]                           


Epoch #166: test_reward: 249.513333 ± 164.999775, best_reward: 374.474125 ± 149.183616 in #111


Epoch #167: 20001it [01:01, 324.23it/s, env_step=3340000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.089, pursuer_1/loss=1.143, pursuer_2/loss=1.305, pursuer_3/loss=1.165, pursuer_4/loss=1.170, pursuer_5/loss=1.143, pursuer_6/loss=1.295, pursuer_7/loss=1.325, rew=256.40]                           


Epoch #167: test_reward: 315.545792 ± 151.247639, best_reward: 374.474125 ± 149.183616 in #111


Epoch #168: 20001it [01:01, 322.97it/s, env_step=3360000, len=1408, n/ep=0, n/st=400, pursuer_0/loss=1.133, pursuer_1/loss=0.999, pursuer_2/loss=1.146, pursuer_3/loss=1.194, pursuer_4/loss=1.112, pursuer_5/loss=1.114, pursuer_6/loss=1.320, pursuer_7/loss=1.165, rew=664.90]                           


Epoch #168: test_reward: 182.537708 ± 170.265010, best_reward: 374.474125 ± 149.183616 in #111


Epoch #169: 20001it [01:02, 321.61it/s, env_step=3380000, len=1668, n/ep=0, n/st=400, pursuer_0/loss=1.120, pursuer_1/loss=1.083, pursuer_2/loss=1.190, pursuer_3/loss=1.088, pursuer_4/loss=1.156, pursuer_5/loss=1.107, pursuer_6/loss=1.214, pursuer_7/loss=1.221, rew=386.24]                           


Epoch #169: test_reward: 261.210250 ± 188.422457, best_reward: 374.474125 ± 149.183616 in #111


Epoch #170: 20001it [01:02, 321.98it/s, env_step=3400000, len=2896, n/ep=0, n/st=400, pursuer_0/loss=1.059, pursuer_1/loss=1.015, pursuer_2/loss=1.261, pursuer_3/loss=1.179, pursuer_4/loss=1.080, pursuer_5/loss=0.968, pursuer_6/loss=1.278, pursuer_7/loss=1.204, rew=255.64]                           


Epoch #170: test_reward: 308.953125 ± 217.664331, best_reward: 374.474125 ± 149.183616 in #111


Epoch #171: 20001it [01:02, 318.95it/s, env_step=3420000, len=1944, n/ep=1, n/st=400, pursuer_0/loss=1.009, pursuer_1/loss=1.056, pursuer_2/loss=1.091, pursuer_3/loss=1.040, pursuer_4/loss=1.115, pursuer_5/loss=1.005, pursuer_6/loss=1.269, pursuer_7/loss=1.166, rew=530.89]                           


Epoch #171: test_reward: 328.836125 ± 113.336792, best_reward: 374.474125 ± 149.183616 in #111


Epoch #172: 20001it [01:03, 316.96it/s, env_step=3440000, len=1808, n/ep=0, n/st=400, pursuer_0/loss=1.087, pursuer_1/loss=1.163, pursuer_2/loss=1.373, pursuer_3/loss=1.082, pursuer_4/loss=1.121, pursuer_5/loss=1.163, pursuer_6/loss=1.401, pursuer_7/loss=1.260, rew=407.96]                           


Epoch #172: test_reward: 274.614500 ± 144.749930, best_reward: 374.474125 ± 149.183616 in #111


Epoch #173: 20001it [01:03, 313.37it/s, env_step=3460000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.189, pursuer_1/loss=1.166, pursuer_2/loss=1.408, pursuer_3/loss=1.330, pursuer_4/loss=1.243, pursuer_5/loss=1.348, pursuer_6/loss=1.383, pursuer_7/loss=1.165, rew=124.91]                           


Epoch #173: test_reward: 312.685417 ± 161.528426, best_reward: 374.474125 ± 149.183616 in #111


Epoch #174: 20001it [01:04, 312.28it/s, env_step=3480000, len=2032, n/ep=0, n/st=400, pursuer_0/loss=1.117, pursuer_1/loss=1.243, pursuer_2/loss=1.265, pursuer_3/loss=1.238, pursuer_4/loss=1.115, pursuer_5/loss=1.206, pursuer_6/loss=1.218, pursuer_7/loss=1.251, rew=412.11]                           


Steps Policy Saved  1270
Epoch #174: test_reward: 274.271833 ± 174.408991, best_reward: 374.474125 ± 149.183616 in #111


Epoch #175: 20001it [01:04, 312.06it/s, env_step=3500000, len=1456, n/ep=0, n/st=400, pursuer_0/loss=1.179, pursuer_1/loss=1.212, pursuer_2/loss=1.316, pursuer_3/loss=1.343, pursuer_4/loss=1.202, pursuer_5/loss=1.298, pursuer_6/loss=1.284, pursuer_7/loss=1.234, rew=459.77]                           


Epoch #175: test_reward: 244.212625 ± 205.328701, best_reward: 374.474125 ± 149.183616 in #111


Epoch #176: 20001it [01:05, 306.45it/s, env_step=3520000, len=2184, n/ep=0, n/st=400, pursuer_0/loss=1.189, pursuer_1/loss=1.331, pursuer_2/loss=1.361, pursuer_3/loss=1.214, pursuer_4/loss=1.290, pursuer_5/loss=1.067, pursuer_6/loss=1.504, pursuer_7/loss=1.282, rew=421.66]                           


Epoch #176: test_reward: 204.642042 ± 162.152700, best_reward: 374.474125 ± 149.183616 in #111


Epoch #177: 20001it [01:06, 300.13it/s, env_step=3540000, len=1976, n/ep=0, n/st=400, pursuer_0/loss=1.146, pursuer_1/loss=1.319, pursuer_2/loss=1.263, pursuer_3/loss=1.142, pursuer_4/loss=1.138, pursuer_5/loss=1.046, pursuer_6/loss=1.390, pursuer_7/loss=1.323, rew=518.29]                           


Epoch #177: test_reward: 279.162625 ± 219.857441, best_reward: 374.474125 ± 149.183616 in #111


Epoch #178: 20001it [01:07, 296.18it/s, env_step=3560000, len=2444, n/ep=0, n/st=400, pursuer_0/loss=1.038, pursuer_1/loss=1.061, pursuer_2/loss=1.256, pursuer_3/loss=1.200, pursuer_4/loss=1.205, pursuer_5/loss=1.228, pursuer_6/loss=1.099, pursuer_7/loss=1.237, rew=397.89]                           


Epoch #178: test_reward: 189.175792 ± 157.772150, best_reward: 374.474125 ± 149.183616 in #111


Epoch #179: 20001it [01:06, 300.05it/s, env_step=3580000, len=2656, n/ep=0, n/st=400, pursuer_0/loss=1.226, pursuer_1/loss=1.210, pursuer_2/loss=1.326, pursuer_3/loss=1.315, pursuer_4/loss=1.297, pursuer_5/loss=1.230, pursuer_6/loss=1.231, pursuer_7/loss=1.449, rew=255.94]                           


Steps Policy Saved  1310
Epoch #179: test_reward: 245.047583 ± 153.267564, best_reward: 374.474125 ± 149.183616 in #111


Epoch #180: 20001it [01:07, 298.17it/s, env_step=3600000, len=3856, n/ep=0, n/st=400, pursuer_0/loss=1.231, pursuer_1/loss=1.195, pursuer_2/loss=1.172, pursuer_3/loss=1.202, pursuer_4/loss=1.140, pursuer_5/loss=1.255, pursuer_6/loss=1.250, pursuer_7/loss=1.263, rew=161.72]                           


Epoch #180: test_reward: 282.347625 ± 180.906621, best_reward: 374.474125 ± 149.183616 in #111


Epoch #181: 20001it [01:08, 293.05it/s, env_step=3620000, len=1848, n/ep=1, n/st=400, pursuer_0/loss=1.020, pursuer_1/loss=1.117, pursuer_2/loss=1.201, pursuer_3/loss=1.219, pursuer_4/loss=1.256, pursuer_5/loss=1.243, pursuer_6/loss=1.319, pursuer_7/loss=1.410, rew=369.12]                           


Epoch #181: test_reward: 181.803792 ± 141.856135, best_reward: 374.474125 ± 149.183616 in #111


Epoch #182: 20001it [01:06, 301.61it/s, env_step=3640000, len=2736, n/ep=0, n/st=400, pursuer_0/loss=1.166, pursuer_1/loss=1.184, pursuer_2/loss=1.329, pursuer_3/loss=1.261, pursuer_4/loss=1.252, pursuer_5/loss=1.275, pursuer_6/loss=1.371, pursuer_7/loss=1.356, rew=197.28]                           


Epoch #182: test_reward: 221.692750 ± 140.384486, best_reward: 374.474125 ± 149.183616 in #111


Epoch #183: 20001it [01:06, 301.41it/s, env_step=3660000, len=1608, n/ep=1, n/st=400, pursuer_0/loss=1.226, pursuer_1/loss=1.261, pursuer_2/loss=1.216, pursuer_3/loss=1.261, pursuer_4/loss=1.161, pursuer_5/loss=1.345, pursuer_6/loss=1.320, pursuer_7/loss=1.428, rew=560.31]                           


Epoch #183: test_reward: 312.113167 ± 191.624765, best_reward: 374.474125 ± 149.183616 in #111


Epoch #184: 20001it [01:06, 301.29it/s, env_step=3680000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.159, pursuer_1/loss=1.243, pursuer_2/loss=1.205, pursuer_3/loss=1.275, pursuer_4/loss=1.143, pursuer_5/loss=1.218, pursuer_6/loss=1.273, pursuer_7/loss=1.355, rew=315.15]                           


Epoch #184: test_reward: 196.806750 ± 150.546310, best_reward: 374.474125 ± 149.183616 in #111


Epoch #185: 20001it [01:07, 298.16it/s, env_step=3700000, len=3280, n/ep=0, n/st=400, pursuer_0/loss=1.117, pursuer_1/loss=1.241, pursuer_2/loss=1.248, pursuer_3/loss=1.358, pursuer_4/loss=1.196, pursuer_5/loss=1.312, pursuer_6/loss=1.454, pursuer_7/loss=1.387, rew=308.09]                           


Steps Policy Saved  1360
Epoch #185: test_reward: 262.462083 ± 181.262375, best_reward: 374.474125 ± 149.183616 in #111


Epoch #186: 20001it [01:06, 301.42it/s, env_step=3720000, len=1272, n/ep=1, n/st=400, pursuer_0/loss=1.224, pursuer_1/loss=1.106, pursuer_2/loss=1.275, pursuer_3/loss=1.257, pursuer_4/loss=1.162, pursuer_5/loss=1.197, pursuer_6/loss=1.268, pursuer_7/loss=1.292, rew=457.50]                           


Epoch #186: test_reward: 297.390583 ± 201.580198, best_reward: 374.474125 ± 149.183616 in #111


Epoch #187: 20001it [01:07, 298.31it/s, env_step=3740000, len=2528, n/ep=0, n/st=400, pursuer_0/loss=1.050, pursuer_1/loss=1.197, pursuer_2/loss=1.212, pursuer_3/loss=1.222, pursuer_4/loss=1.097, pursuer_5/loss=1.115, pursuer_6/loss=1.270, pursuer_7/loss=1.398, rew=286.86]                           


Epoch #187: test_reward: 262.099250 ± 143.453934, best_reward: 374.474125 ± 149.183616 in #111


Epoch #188: 20001it [01:08, 293.92it/s, env_step=3760000, len=1632, n/ep=0, n/st=400, pursuer_0/loss=1.157, pursuer_1/loss=1.256, pursuer_2/loss=1.272, pursuer_3/loss=1.425, pursuer_4/loss=1.280, pursuer_5/loss=1.203, pursuer_6/loss=1.176, pursuer_7/loss=1.448, rew=311.64]                           


Epoch #188: test_reward: 201.219375 ± 146.297254, best_reward: 374.474125 ± 149.183616 in #111


Epoch #189: 20001it [01:07, 298.42it/s, env_step=3780000, len=1384, n/ep=0, n/st=400, pursuer_0/loss=1.234, pursuer_1/loss=1.279, pursuer_2/loss=1.374, pursuer_3/loss=1.448, pursuer_4/loss=1.239, pursuer_5/loss=1.331, pursuer_6/loss=1.420, pursuer_7/loss=1.491, rew=661.57]                           


Epoch #189: test_reward: 255.816917 ± 192.166375, best_reward: 374.474125 ± 149.183616 in #111


Epoch #190: 20001it [01:06, 299.44it/s, env_step=3800000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.232, pursuer_1/loss=1.386, pursuer_2/loss=1.328, pursuer_3/loss=1.289, pursuer_4/loss=1.250, pursuer_5/loss=1.255, pursuer_6/loss=1.430, pursuer_7/loss=1.389, rew=114.95]                           


Epoch #190: test_reward: 220.096667 ± 132.624664, best_reward: 374.474125 ± 149.183616 in #111


Epoch #191: 20001it [01:06, 300.42it/s, env_step=3820000, len=1984, n/ep=0, n/st=400, pursuer_0/loss=1.149, pursuer_1/loss=1.297, pursuer_2/loss=1.252, pursuer_3/loss=1.310, pursuer_4/loss=1.030, pursuer_5/loss=1.227, pursuer_6/loss=1.292, pursuer_7/loss=1.325, rew=456.22]                           


Epoch #191: test_reward: 273.335667 ± 155.082814, best_reward: 374.474125 ± 149.183616 in #111


Epoch #192: 20001it [01:06, 299.56it/s, env_step=3840000, len=1952, n/ep=0, n/st=400, pursuer_0/loss=1.048, pursuer_1/loss=1.293, pursuer_2/loss=1.530, pursuer_3/loss=1.272, pursuer_4/loss=1.260, pursuer_5/loss=1.328, pursuer_6/loss=1.138, pursuer_7/loss=1.366, rew=388.35]                           


Epoch #192: test_reward: 298.844125 ± 162.392324, best_reward: 374.474125 ± 149.183616 in #111


Epoch #193: 20001it [01:06, 301.15it/s, env_step=3860000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.081, pursuer_1/loss=1.116, pursuer_2/loss=1.153, pursuer_3/loss=1.163, pursuer_4/loss=1.155, pursuer_5/loss=1.222, pursuer_6/loss=1.276, pursuer_7/loss=1.253, rew=268.62]                           


Epoch #193: test_reward: 309.621083 ± 189.982505, best_reward: 374.474125 ± 149.183616 in #111


Epoch #194: 20001it [01:05, 304.06it/s, env_step=3880000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.193, pursuer_1/loss=1.390, pursuer_2/loss=1.239, pursuer_3/loss=1.348, pursuer_4/loss=1.183, pursuer_5/loss=1.290, pursuer_6/loss=1.419, pursuer_7/loss=1.536, rew=20.41]                            


Epoch #194: test_reward: 204.826958 ± 143.155727, best_reward: 374.474125 ± 149.183616 in #111


Epoch #195: 20001it [01:05, 306.64it/s, env_step=3900000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.122, pursuer_1/loss=1.290, pursuer_2/loss=1.246, pursuer_3/loss=1.322, pursuer_4/loss=1.187, pursuer_5/loss=1.209, pursuer_6/loss=1.212, pursuer_7/loss=1.314, rew=145.75]                           


Epoch #195: test_reward: 274.934458 ± 202.607333, best_reward: 374.474125 ± 149.183616 in #111


Epoch #196: 20001it [01:05, 306.47it/s, env_step=3920000, len=1352, n/ep=0, n/st=400, pursuer_0/loss=1.101, pursuer_1/loss=1.142, pursuer_2/loss=1.160, pursuer_3/loss=1.435, pursuer_4/loss=1.160, pursuer_5/loss=1.211, pursuer_6/loss=1.294, pursuer_7/loss=1.311, rew=444.62]                           


Epoch #196: test_reward: 243.457083 ± 151.841432, best_reward: 374.474125 ± 149.183616 in #111


Epoch #197: 20001it [01:06, 302.61it/s, env_step=3940000, len=1560, n/ep=0, n/st=400, pursuer_0/loss=0.940, pursuer_1/loss=1.109, pursuer_2/loss=1.075, pursuer_3/loss=1.111, pursuer_4/loss=1.097, pursuer_5/loss=1.159, pursuer_6/loss=1.156, pursuer_7/loss=1.257, rew=514.07]                           


Epoch #197: test_reward: 254.898542 ± 167.536556, best_reward: 374.474125 ± 149.183616 in #111


Epoch #198: 20001it [01:06, 300.75it/s, env_step=3960000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.988, pursuer_1/loss=1.067, pursuer_2/loss=1.095, pursuer_3/loss=1.196, pursuer_4/loss=0.982, pursuer_5/loss=1.062, pursuer_6/loss=1.053, pursuer_7/loss=1.297, rew=218.78]                           


Epoch #198: test_reward: 202.828458 ± 143.421099, best_reward: 374.474125 ± 149.183616 in #111


Epoch #199: 20001it [01:05, 306.40it/s, env_step=3980000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.135, pursuer_1/loss=1.179, pursuer_2/loss=1.055, pursuer_3/loss=1.390, pursuer_4/loss=1.082, pursuer_5/loss=1.182, pursuer_6/loss=1.161, pursuer_7/loss=1.408, rew=24.46]                            


Steps Policy Saved  1470
Epoch #199: test_reward: 261.630792 ± 151.476253, best_reward: 374.474125 ± 149.183616 in #111


Epoch #200: 20001it [01:06, 300.60it/s, env_step=4000000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.956, pursuer_1/loss=1.006, pursuer_2/loss=1.080, pursuer_3/loss=1.169, pursuer_4/loss=1.094, pursuer_5/loss=1.116, pursuer_6/loss=1.031, pursuer_7/loss=1.105, rew=230.06]                           


Epoch #200: test_reward: 282.157500 ± 180.997776, best_reward: 374.474125 ± 149.183616 in #111


Epoch #201: 20001it [01:07, 297.27it/s, env_step=4020000, len=2272, n/ep=0, n/st=400, pursuer_0/loss=0.967, pursuer_1/loss=1.206, pursuer_2/loss=1.127, pursuer_3/loss=1.099, pursuer_4/loss=1.048, pursuer_5/loss=1.209, pursuer_6/loss=1.207, pursuer_7/loss=1.141, rew=356.58]                           


Epoch #201: test_reward: 301.957625 ± 233.028870, best_reward: 374.474125 ± 149.183616 in #111


Epoch #202: 20001it [01:07, 295.16it/s, env_step=4040000, len=2452, n/ep=0, n/st=400, pursuer_0/loss=1.028, pursuer_1/loss=1.130, pursuer_2/loss=1.076, pursuer_3/loss=1.165, pursuer_4/loss=1.072, pursuer_5/loss=1.231, pursuer_6/loss=1.218, pursuer_7/loss=1.236, rew=502.15]                           


Epoch #202: test_reward: 185.903958 ± 150.018140, best_reward: 374.474125 ± 149.183616 in #111


Epoch #203: 20001it [01:06, 298.63it/s, env_step=4060000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.948, pursuer_1/loss=0.929, pursuer_2/loss=1.137, pursuer_3/loss=1.204, pursuer_4/loss=0.996, pursuer_5/loss=1.003, pursuer_6/loss=1.089, pursuer_7/loss=1.277, rew=-47.29]                           


Epoch #203: test_reward: 189.302208 ± 190.387003, best_reward: 374.474125 ± 149.183616 in #111


Epoch #204: 20001it [01:04, 312.05it/s, env_step=4080000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.970, pursuer_1/loss=1.026, pursuer_2/loss=1.206, pursuer_3/loss=1.048, pursuer_4/loss=1.109, pursuer_5/loss=1.140, pursuer_6/loss=1.273, pursuer_7/loss=1.322, rew=187.25]                           


Epoch #204: test_reward: 307.907667 ± 165.327027, best_reward: 374.474125 ± 149.183616 in #111


Epoch #205: 20001it [01:05, 306.27it/s, env_step=4100000, len=1304, n/ep=0, n/st=400, pursuer_0/loss=1.005, pursuer_1/loss=1.111, pursuer_2/loss=1.032, pursuer_3/loss=1.130, pursuer_4/loss=0.909, pursuer_5/loss=1.042, pursuer_6/loss=1.073, pursuer_7/loss=1.208, rew=624.41]                           


Epoch #205: test_reward: 278.013792 ± 213.140926, best_reward: 374.474125 ± 149.183616 in #111


Epoch #206: 20001it [01:01, 324.78it/s, env_step=4120000, len=2988, n/ep=0, n/st=400, pursuer_0/loss=1.030, pursuer_1/loss=1.059, pursuer_2/loss=1.104, pursuer_3/loss=1.149, pursuer_4/loss=1.027, pursuer_5/loss=0.992, pursuer_6/loss=1.022, pursuer_7/loss=1.219, rew=378.68]                           


Steps Policy Saved  1520
Epoch #206: test_reward: 246.538083 ± 190.218300, best_reward: 374.474125 ± 149.183616 in #111


Epoch #207: 20001it [01:03, 317.16it/s, env_step=4140000, len=3608, n/ep=0, n/st=400, pursuer_0/loss=0.833, pursuer_1/loss=0.920, pursuer_2/loss=1.040, pursuer_3/loss=0.990, pursuer_4/loss=0.950, pursuer_5/loss=0.902, pursuer_6/loss=1.081, pursuer_7/loss=1.179, rew=120.07]                           


Epoch #207: test_reward: 324.747042 ± 174.380167, best_reward: 374.474125 ± 149.183616 in #111


Epoch #208: 20001it [01:03, 315.34it/s, env_step=4160000, len=2080, n/ep=0, n/st=400, pursuer_0/loss=0.998, pursuer_1/loss=1.171, pursuer_2/loss=1.082, pursuer_3/loss=1.123, pursuer_4/loss=1.069, pursuer_5/loss=1.065, pursuer_6/loss=1.088, pursuer_7/loss=1.302, rew=370.62]                           


Epoch #208: test_reward: 285.625042 ± 182.579714, best_reward: 374.474125 ± 149.183616 in #111


Epoch #209: 20001it [01:03, 312.84it/s, env_step=4180000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.910, pursuer_1/loss=0.899, pursuer_2/loss=1.093, pursuer_3/loss=1.026, pursuer_4/loss=0.993, pursuer_5/loss=0.955, pursuer_6/loss=1.087, pursuer_7/loss=1.331, rew=199.77]                           


Epoch #209: test_reward: 209.053250 ± 155.604623, best_reward: 374.474125 ± 149.183616 in #111


Epoch #210: 20001it [01:06, 302.71it/s, env_step=4200000, len=2476, n/ep=0, n/st=400, pursuer_0/loss=0.958, pursuer_1/loss=1.074, pursuer_2/loss=1.095, pursuer_3/loss=1.099, pursuer_4/loss=1.014, pursuer_5/loss=1.104, pursuer_6/loss=1.103, pursuer_7/loss=1.320, rew=414.11]                           


Epoch #210: test_reward: 350.772500 ± 196.185264, best_reward: 374.474125 ± 149.183616 in #111


Epoch #211: 20001it [01:06, 300.31it/s, env_step=4220000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.912, pursuer_1/loss=1.050, pursuer_2/loss=1.035, pursuer_3/loss=1.169, pursuer_4/loss=0.984, pursuer_5/loss=1.155, pursuer_6/loss=0.949, pursuer_7/loss=1.163, rew=339.58]                           


Epoch #211: test_reward: 233.053667 ± 220.906933, best_reward: 374.474125 ± 149.183616 in #111


Epoch #212: 20001it [01:06, 301.64it/s, env_step=4240000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.937, pursuer_1/loss=1.044, pursuer_2/loss=1.008, pursuer_3/loss=1.134, pursuer_4/loss=0.930, pursuer_5/loss=1.029, pursuer_6/loss=1.066, pursuer_7/loss=1.162, rew=179.90]                           


Epoch #212: test_reward: 294.071792 ± 186.828923, best_reward: 374.474125 ± 149.183616 in #111


Epoch #213: 20001it [01:06, 301.52it/s, env_step=4260000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.956, pursuer_1/loss=1.073, pursuer_2/loss=1.089, pursuer_3/loss=1.214, pursuer_4/loss=0.961, pursuer_5/loss=1.075, pursuer_6/loss=1.118, pursuer_7/loss=1.253, rew=63.15]                            


Epoch #213: test_reward: 294.427917 ± 214.161784, best_reward: 374.474125 ± 149.183616 in #111


Epoch #214: 20001it [01:07, 298.50it/s, env_step=4280000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.890, pursuer_1/loss=1.014, pursuer_2/loss=1.037, pursuer_3/loss=1.191, pursuer_4/loss=0.981, pursuer_5/loss=1.030, pursuer_6/loss=1.127, pursuer_7/loss=1.158, rew=314.58]                           


Steps Policy Saved  1580
Epoch #214: test_reward: 305.175333 ± 167.254490, best_reward: 374.474125 ± 149.183616 in #111


Epoch #215: 20001it [01:06, 299.96it/s, env_step=4300000, len=1144, n/ep=0, n/st=400, pursuer_0/loss=0.913, pursuer_1/loss=1.050, pursuer_2/loss=1.035, pursuer_3/loss=1.163, pursuer_4/loss=0.917, pursuer_5/loss=1.048, pursuer_6/loss=1.042, pursuer_7/loss=1.235, rew=483.71]                           


Steps Policy Saved  1590
Epoch #215: test_reward: 352.930542 ± 193.351543, best_reward: 374.474125 ± 149.183616 in #111


Epoch #216: 20001it [01:06, 301.24it/s, env_step=4320000, len=3336, n/ep=0, n/st=400, pursuer_0/loss=1.168, pursuer_1/loss=1.145, pursuer_2/loss=1.228, pursuer_3/loss=1.231, pursuer_4/loss=1.095, pursuer_5/loss=1.146, pursuer_6/loss=1.144, pursuer_7/loss=1.202, rew=246.64]                           


Epoch #216: test_reward: 352.340042 ± 166.118775, best_reward: 374.474125 ± 149.183616 in #111


Epoch #217: 20001it [01:06, 302.24it/s, env_step=4340000, len=3920, n/ep=0, n/st=400, pursuer_0/loss=0.974, pursuer_1/loss=1.094, pursuer_2/loss=1.154, pursuer_3/loss=1.209, pursuer_4/loss=1.014, pursuer_5/loss=1.092, pursuer_6/loss=1.098, pursuer_7/loss=1.267, rew=324.09]                           


Epoch #217: test_reward: 274.050708 ± 191.272761, best_reward: 374.474125 ± 149.183616 in #111


Epoch #218: 20001it [01:06, 302.01it/s, env_step=4360000, len=3552, n/ep=1, n/st=400, pursuer_0/loss=1.008, pursuer_1/loss=1.026, pursuer_2/loss=1.056, pursuer_3/loss=1.077, pursuer_4/loss=1.028, pursuer_5/loss=1.245, pursuer_6/loss=1.134, pursuer_7/loss=1.176, rew=129.64]                           


Epoch #218: test_reward: 260.826667 ± 217.708274, best_reward: 374.474125 ± 149.183616 in #111


Epoch #219: 20001it [01:07, 295.06it/s, env_step=4380000, len=2104, n/ep=0, n/st=400, pursuer_0/loss=1.057, pursuer_1/loss=1.130, pursuer_2/loss=1.069, pursuer_3/loss=1.137, pursuer_4/loss=1.159, pursuer_5/loss=1.190, pursuer_6/loss=1.118, pursuer_7/loss=1.242, rew=317.75]                           


Epoch #219: test_reward: 338.971792 ± 163.307826, best_reward: 374.474125 ± 149.183616 in #111


Epoch #220: 20001it [01:03, 312.72it/s, env_step=4400000, len=3888, n/ep=1, n/st=400, pursuer_0/loss=1.054, pursuer_1/loss=1.081, pursuer_2/loss=1.142, pursuer_3/loss=1.069, pursuer_4/loss=1.041, pursuer_5/loss=1.097, pursuer_6/loss=1.174, pursuer_7/loss=1.245, rew=193.14]                           


Epoch #220: test_reward: 317.243792 ± 210.229255, best_reward: 374.474125 ± 149.183616 in #111


Epoch #221: 20001it [01:03, 314.21it/s, env_step=4420000, len=904, n/ep=0, n/st=400, pursuer_0/loss=1.151, pursuer_1/loss=1.238, pursuer_2/loss=1.157, pursuer_3/loss=1.201, pursuer_4/loss=1.099, pursuer_5/loss=1.388, pursuer_6/loss=1.282, pursuer_7/loss=1.321, rew=718.61]                            


Epoch #221: test_reward: 307.093208 ± 194.717396, best_reward: 374.474125 ± 149.183616 in #111


Epoch #222: 20001it [01:02, 317.62it/s, env_step=4440000, len=2408, n/ep=0, n/st=400, pursuer_0/loss=1.057, pursuer_1/loss=1.237, pursuer_2/loss=1.112, pursuer_3/loss=1.174, pursuer_4/loss=1.196, pursuer_5/loss=1.165, pursuer_6/loss=1.278, pursuer_7/loss=1.240, rew=398.34]                           


Epoch #222: test_reward: 353.751167 ± 159.634855, best_reward: 374.474125 ± 149.183616 in #111


Epoch #223: 20001it [01:02, 320.92it/s, env_step=4460000, len=3776, n/ep=0, n/st=400, pursuer_0/loss=1.092, pursuer_1/loss=1.161, pursuer_2/loss=1.183, pursuer_3/loss=1.392, pursuer_4/loss=1.191, pursuer_5/loss=1.338, pursuer_6/loss=1.240, pursuer_7/loss=1.330, rew=346.73]                           


Epoch #223: test_reward: 297.644292 ± 212.254208, best_reward: 374.474125 ± 149.183616 in #111


Epoch #224: 20001it [01:02, 322.09it/s, env_step=4480000, len=1440, n/ep=0, n/st=400, pursuer_0/loss=1.106, pursuer_1/loss=1.182, pursuer_2/loss=1.251, pursuer_3/loss=1.219, pursuer_4/loss=1.126, pursuer_5/loss=1.233, pursuer_6/loss=1.216, pursuer_7/loss=1.251, rew=516.76]                           


Best Saved Rew 1673
Epoch #224: test_reward: 398.481917 ± 195.315630, best_reward: 398.481917 ± 195.315630 in #224


Epoch #225: 20001it [01:02, 317.61it/s, env_step=4500000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.249, pursuer_1/loss=1.287, pursuer_2/loss=1.236, pursuer_3/loss=1.330, pursuer_4/loss=1.112, pursuer_5/loss=1.399, pursuer_6/loss=1.307, pursuer_7/loss=1.306, rew=364.85]                           


Epoch #225: test_reward: 337.622708 ± 180.977435, best_reward: 398.481917 ± 195.315630 in #224


Epoch #226: 20001it [01:06, 300.56it/s, env_step=4520000, len=2864, n/ep=0, n/st=400, pursuer_0/loss=1.181, pursuer_1/loss=1.418, pursuer_2/loss=1.291, pursuer_3/loss=1.291, pursuer_4/loss=1.254, pursuer_5/loss=1.326, pursuer_6/loss=1.267, pursuer_7/loss=1.358, rew=382.62]                           


Steps Policy Saved  1690
Epoch #226: test_reward: 288.294292 ± 180.678084, best_reward: 398.481917 ± 195.315630 in #224


Epoch #227: 20001it [01:01, 322.64it/s, env_step=4540000, len=1200, n/ep=1, n/st=400, pursuer_0/loss=1.168, pursuer_1/loss=1.359, pursuer_2/loss=1.411, pursuer_3/loss=1.374, pursuer_4/loss=1.316, pursuer_5/loss=1.278, pursuer_6/loss=1.351, pursuer_7/loss=1.374, rew=633.77]                           


Epoch #227: test_reward: 371.593708 ± 190.700677, best_reward: 398.481917 ± 195.315630 in #224


Epoch #228: 20001it [01:03, 315.51it/s, env_step=4560000, len=1144, n/ep=0, n/st=400, pursuer_0/loss=1.208, pursuer_1/loss=1.297, pursuer_2/loss=1.337, pursuer_3/loss=1.449, pursuer_4/loss=1.355, pursuer_5/loss=1.333, pursuer_6/loss=1.372, pursuer_7/loss=1.309, rew=541.08]                           


Epoch #228: test_reward: 350.564667 ± 198.863267, best_reward: 398.481917 ± 195.315630 in #224


Epoch #229: 20001it [01:02, 319.39it/s, env_step=4580000, len=1728, n/ep=0, n/st=400, pursuer_0/loss=1.225, pursuer_1/loss=1.390, pursuer_2/loss=1.347, pursuer_3/loss=1.357, pursuer_4/loss=1.409, pursuer_5/loss=1.517, pursuer_6/loss=1.312, pursuer_7/loss=1.430, rew=357.62]                           


Epoch #229: test_reward: 244.951208 ± 196.478549, best_reward: 398.481917 ± 195.315630 in #224


Epoch #230: 20001it [01:02, 322.09it/s, env_step=4600000, len=3704, n/ep=0, n/st=400, pursuer_0/loss=1.298, pursuer_1/loss=1.293, pursuer_2/loss=1.281, pursuer_3/loss=1.262, pursuer_4/loss=1.255, pursuer_5/loss=1.449, pursuer_6/loss=1.243, pursuer_7/loss=1.327, rew=113.29]                           


Epoch #230: test_reward: 396.293750 ± 194.245883, best_reward: 398.481917 ± 195.315630 in #224


Epoch #231: 20001it [01:01, 326.53it/s, env_step=4620000, len=1488, n/ep=0, n/st=400, pursuer_0/loss=1.094, pursuer_1/loss=1.339, pursuer_2/loss=1.330, pursuer_3/loss=1.505, pursuer_4/loss=1.318, pursuer_5/loss=1.406, pursuer_6/loss=1.327, pursuer_7/loss=1.485, rew=439.48]                           


Epoch #231: test_reward: 262.311417 ± 222.101125, best_reward: 398.481917 ± 195.315630 in #224


Epoch #232: 20001it [01:04, 308.04it/s, env_step=4640000, len=1360, n/ep=0, n/st=400, pursuer_0/loss=1.172, pursuer_1/loss=1.339, pursuer_2/loss=1.323, pursuer_3/loss=1.300, pursuer_4/loss=1.131, pursuer_5/loss=1.363, pursuer_6/loss=1.168, pursuer_7/loss=1.435, rew=539.27]                           


Epoch #232: test_reward: 299.975292 ± 232.314738, best_reward: 398.481917 ± 195.315630 in #224


Epoch #233: 20001it [01:03, 314.01it/s, env_step=4660000, len=1432, n/ep=0, n/st=400, pursuer_0/loss=1.253, pursuer_1/loss=1.505, pursuer_2/loss=1.494, pursuer_3/loss=1.331, pursuer_4/loss=1.299, pursuer_5/loss=1.399, pursuer_6/loss=1.409, pursuer_7/loss=1.425, rew=600.61]                           


Epoch #233: test_reward: 293.047708 ± 177.758695, best_reward: 398.481917 ± 195.315630 in #224


Epoch #234: 20001it [01:02, 317.50it/s, env_step=4680000, len=2184, n/ep=0, n/st=400, pursuer_0/loss=1.280, pursuer_1/loss=1.327, pursuer_2/loss=1.378, pursuer_3/loss=1.186, pursuer_4/loss=1.383, pursuer_5/loss=1.315, pursuer_6/loss=1.371, pursuer_7/loss=1.331, rew=577.31]                           


Epoch #234: test_reward: 355.200917 ± 202.601968, best_reward: 398.481917 ± 195.315630 in #224


Epoch #235: 20001it [01:03, 314.89it/s, env_step=4700000, len=1808, n/ep=1, n/st=400, pursuer_0/loss=1.331, pursuer_1/loss=1.488, pursuer_2/loss=1.601, pursuer_3/loss=1.384, pursuer_4/loss=1.250, pursuer_5/loss=1.412, pursuer_6/loss=1.493, pursuer_7/loss=1.428, rew=315.04]                           


Epoch #235: test_reward: 169.767792 ± 169.305683, best_reward: 398.481917 ± 195.315630 in #224


Epoch #236: 20001it [01:02, 317.93it/s, env_step=4720000, len=1152, n/ep=0, n/st=400, pursuer_0/loss=1.323, pursuer_1/loss=1.412, pursuer_2/loss=1.484, pursuer_3/loss=1.341, pursuer_4/loss=1.286, pursuer_5/loss=1.551, pursuer_6/loss=1.516, pursuer_7/loss=1.305, rew=619.08]                           


Epoch #236: test_reward: 301.111167 ± 179.955016, best_reward: 398.481917 ± 195.315630 in #224


Epoch #237: 20001it [01:02, 318.21it/s, env_step=4740000, len=2512, n/ep=0, n/st=400, pursuer_0/loss=1.346, pursuer_1/loss=1.422, pursuer_2/loss=1.411, pursuer_3/loss=1.388, pursuer_4/loss=1.304, pursuer_5/loss=1.405, pursuer_6/loss=1.527, pursuer_7/loss=1.413, rew=252.22]                           


Steps Policy Saved  1790
Epoch #237: test_reward: 328.308750 ± 204.885935, best_reward: 398.481917 ± 195.315630 in #224


Epoch #238: 20001it [01:01, 325.60it/s, env_step=4760000, len=816, n/ep=1, n/st=400, pursuer_0/loss=1.378, pursuer_1/loss=1.462, pursuer_2/loss=1.364, pursuer_3/loss=1.373, pursuer_4/loss=1.267, pursuer_5/loss=1.449, pursuer_6/loss=1.438, pursuer_7/loss=1.547, rew=688.26]                            


Steps Policy Saved  1800
Epoch #238: test_reward: 312.307083 ± 143.765016, best_reward: 398.481917 ± 195.315630 in #224


Epoch #239: 20001it [01:02, 318.25it/s, env_step=4780000, len=1848, n/ep=0, n/st=400, pursuer_0/loss=1.296, pursuer_1/loss=1.340, pursuer_2/loss=1.432, pursuer_3/loss=1.315, pursuer_4/loss=1.277, pursuer_5/loss=1.284, pursuer_6/loss=1.524, pursuer_7/loss=1.487, rew=416.10]                           


Epoch #239: test_reward: 287.366208 ± 195.078438, best_reward: 398.481917 ± 195.315630 in #224


Epoch #240: 20001it [01:02, 317.70it/s, env_step=4800000, len=1404, n/ep=0, n/st=400, pursuer_0/loss=1.240, pursuer_1/loss=1.376, pursuer_2/loss=1.485, pursuer_3/loss=1.420, pursuer_4/loss=1.426, pursuer_5/loss=1.431, pursuer_6/loss=1.525, pursuer_7/loss=1.639, rew=492.87]                           


Epoch #240: test_reward: 309.871125 ± 150.283286, best_reward: 398.481917 ± 195.315630 in #224


Epoch #241: 20001it [01:03, 312.61it/s, env_step=4820000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.303, pursuer_1/loss=1.518, pursuer_2/loss=1.523, pursuer_3/loss=1.336, pursuer_4/loss=1.540, pursuer_5/loss=1.315, pursuer_6/loss=1.543, pursuer_7/loss=1.545, rew=189.71]                           


Epoch #241: test_reward: 260.992625 ± 137.865684, best_reward: 398.481917 ± 195.315630 in #224


Epoch #242: 20001it [01:03, 314.14it/s, env_step=4840000, len=1200, n/ep=0, n/st=400, pursuer_0/loss=1.347, pursuer_1/loss=1.565, pursuer_2/loss=1.374, pursuer_3/loss=1.520, pursuer_4/loss=1.351, pursuer_5/loss=1.530, pursuer_6/loss=1.384, pursuer_7/loss=1.461, rew=608.58]                           


Epoch #242: test_reward: 346.468417 ± 215.838287, best_reward: 398.481917 ± 195.315630 in #224


Epoch #243: 20001it [01:03, 317.16it/s, env_step=4860000, len=1384, n/ep=0, n/st=400, pursuer_0/loss=1.386, pursuer_1/loss=1.562, pursuer_2/loss=1.466, pursuer_3/loss=1.618, pursuer_4/loss=1.419, pursuer_5/loss=1.509, pursuer_6/loss=1.510, pursuer_7/loss=1.523, rew=397.33]                           


Epoch #243: test_reward: 241.446083 ± 193.771740, best_reward: 398.481917 ± 195.315630 in #224


Epoch #244: 20001it [01:01, 325.50it/s, env_step=4880000, len=1776, n/ep=1, n/st=400, pursuer_0/loss=1.493, pursuer_1/loss=1.485, pursuer_2/loss=1.587, pursuer_3/loss=1.528, pursuer_4/loss=1.495, pursuer_5/loss=1.441, pursuer_6/loss=1.582, pursuer_7/loss=1.562, rew=337.07]                           


Epoch #244: test_reward: 342.665625 ± 177.297999, best_reward: 398.481917 ± 195.315630 in #224


Epoch #245: 20001it [01:04, 309.83it/s, env_step=4900000, len=1376, n/ep=0, n/st=400, pursuer_0/loss=1.254, pursuer_1/loss=1.495, pursuer_2/loss=1.464, pursuer_3/loss=1.459, pursuer_4/loss=1.352, pursuer_5/loss=1.509, pursuer_6/loss=1.548, pursuer_7/loss=1.528, rew=441.38]                           


Epoch #245: test_reward: 300.684042 ± 189.900770, best_reward: 398.481917 ± 195.315630 in #224


Epoch #246: 20001it [01:00, 329.12it/s, env_step=4920000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.369, pursuer_1/loss=1.523, pursuer_2/loss=1.517, pursuer_3/loss=1.587, pursuer_4/loss=1.479, pursuer_5/loss=1.487, pursuer_6/loss=1.530, pursuer_7/loss=1.546, rew=183.62]                           


Epoch #246: test_reward: 217.154208 ± 136.274096, best_reward: 398.481917 ± 195.315630 in #224


Epoch #247: 20001it [01:01, 322.85it/s, env_step=4940000, len=1208, n/ep=0, n/st=400, pursuer_0/loss=1.363, pursuer_1/loss=1.482, pursuer_2/loss=1.521, pursuer_3/loss=1.497, pursuer_4/loss=1.521, pursuer_5/loss=1.493, pursuer_6/loss=1.661, pursuer_7/loss=1.673, rew=358.39]                           


Epoch #247: test_reward: 303.550375 ± 199.173420, best_reward: 398.481917 ± 195.315630 in #224


Epoch #248: 20001it [01:03, 317.17it/s, env_step=4960000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.337, pursuer_1/loss=1.366, pursuer_2/loss=1.518, pursuer_3/loss=1.433, pursuer_4/loss=1.481, pursuer_5/loss=1.462, pursuer_6/loss=1.489, pursuer_7/loss=1.548, rew=241.45]                           


Epoch #248: test_reward: 316.734500 ± 182.204163, best_reward: 398.481917 ± 195.315630 in #224


Epoch #249: 20001it [00:59, 335.11it/s, env_step=4980000, len=1512, n/ep=0, n/st=400, pursuer_0/loss=1.297, pursuer_1/loss=1.261, pursuer_2/loss=1.427, pursuer_3/loss=1.577, pursuer_4/loss=1.420, pursuer_5/loss=1.278, pursuer_6/loss=1.561, pursuer_7/loss=1.396, rew=461.08]                           


Epoch #249: test_reward: 384.663833 ± 202.356328, best_reward: 398.481917 ± 195.315630 in #224


Epoch #250: 20001it [01:03, 312.79it/s, env_step=5000000, len=1216, n/ep=0, n/st=400, pursuer_0/loss=1.179, pursuer_1/loss=1.443, pursuer_2/loss=1.445, pursuer_3/loss=1.530, pursuer_4/loss=1.350, pursuer_5/loss=1.417, pursuer_6/loss=1.411, pursuer_7/loss=1.492, rew=338.27]                           


Epoch #250: test_reward: 189.528542 ± 156.655576, best_reward: 398.481917 ± 195.315630 in #224


Epoch #251: 20001it [01:00, 330.71it/s, env_step=5020000, len=2648, n/ep=0, n/st=400, pursuer_0/loss=1.343, pursuer_1/loss=1.531, pursuer_2/loss=1.322, pursuer_3/loss=1.373, pursuer_4/loss=1.388, pursuer_5/loss=1.531, pursuer_6/loss=1.443, pursuer_7/loss=1.622, rew=388.66]                           


Epoch #251: test_reward: 334.178083 ± 175.926539, best_reward: 398.481917 ± 195.315630 in #224


Epoch #252: 20001it [01:04, 310.98it/s, env_step=5040000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.345, pursuer_1/loss=1.346, pursuer_2/loss=1.471, pursuer_3/loss=1.573, pursuer_4/loss=1.387, pursuer_5/loss=1.406, pursuer_6/loss=1.486, pursuer_7/loss=1.663, rew=128.79]                           


Best Saved Rew 1918
Epoch #252: test_reward: 436.195333 ± 186.940356, best_reward: 436.195333 ± 186.940356 in #252


Epoch #253: 20001it [01:00, 329.22it/s, env_step=5060000, len=2640, n/ep=0, n/st=400, pursuer_0/loss=1.468, pursuer_1/loss=1.306, pursuer_2/loss=1.382, pursuer_3/loss=1.573, pursuer_4/loss=1.374, pursuer_5/loss=1.426, pursuer_6/loss=1.456, pursuer_7/loss=1.545, rew=325.58]                           


Epoch #253: test_reward: 268.346667 ± 154.566833, best_reward: 436.195333 ± 186.940356 in #252


Epoch #254: 20001it [01:03, 317.40it/s, env_step=5080000, len=1232, n/ep=0, n/st=400, pursuer_0/loss=1.226, pursuer_1/loss=1.333, pursuer_2/loss=1.238, pursuer_3/loss=1.331, pursuer_4/loss=1.258, pursuer_5/loss=1.217, pursuer_6/loss=1.414, pursuer_7/loss=1.606, rew=661.67]                           


Epoch #254: test_reward: 329.493083 ± 174.886584, best_reward: 436.195333 ± 186.940356 in #252


Epoch #255: 20001it [01:01, 325.81it/s, env_step=5100000, len=928, n/ep=0, n/st=400, pursuer_0/loss=1.165, pursuer_1/loss=1.351, pursuer_2/loss=1.210, pursuer_3/loss=1.386, pursuer_4/loss=1.285, pursuer_5/loss=1.334, pursuer_6/loss=1.316, pursuer_7/loss=1.371, rew=460.50]                            


Epoch #255: test_reward: 262.005542 ± 143.472010, best_reward: 436.195333 ± 186.940356 in #252


Epoch #256: 20001it [01:01, 323.55it/s, env_step=5120000, len=2072, n/ep=0, n/st=400, pursuer_0/loss=1.359, pursuer_1/loss=1.380, pursuer_2/loss=1.337, pursuer_3/loss=1.537, pursuer_4/loss=1.363, pursuer_5/loss=1.421, pursuer_6/loss=1.296, pursuer_7/loss=1.409, rew=579.25]                           


Steps Policy Saved  1950
Epoch #256: test_reward: 315.375542 ± 237.561740, best_reward: 436.195333 ± 186.940356 in #252


Epoch #257: 20001it [01:02, 322.56it/s, env_step=5140000, len=1784, n/ep=0, n/st=400, pursuer_0/loss=1.172, pursuer_1/loss=1.289, pursuer_2/loss=1.314, pursuer_3/loss=1.423, pursuer_4/loss=1.300, pursuer_5/loss=1.242, pursuer_6/loss=1.376, pursuer_7/loss=1.272, rew=525.26]                           


Steps Policy Saved  1960
Best Saved Rew 1961
Epoch #257: test_reward: 453.121708 ± 197.576760, best_reward: 453.121708 ± 197.576760 in #257


Epoch #258: 20001it [01:02, 319.83it/s, env_step=5160000, len=1608, n/ep=0, n/st=400, pursuer_0/loss=1.326, pursuer_1/loss=1.341, pursuer_2/loss=1.484, pursuer_3/loss=1.440, pursuer_4/loss=1.364, pursuer_5/loss=1.344, pursuer_6/loss=1.325, pursuer_7/loss=1.595, rew=403.34]                           


Epoch #258: test_reward: 343.540750 ± 197.686199, best_reward: 453.121708 ± 197.576760 in #257


Epoch #259: 20001it [01:06, 299.07it/s, env_step=5180000, len=1984, n/ep=0, n/st=400, pursuer_0/loss=1.187, pursuer_1/loss=1.313, pursuer_2/loss=1.249, pursuer_3/loss=1.339, pursuer_4/loss=1.333, pursuer_5/loss=1.364, pursuer_6/loss=1.381, pursuer_7/loss=1.498, rew=376.94]                           


Epoch #259: test_reward: 370.078458 ± 194.885162, best_reward: 453.121708 ± 197.576760 in #257


Epoch #260: 20001it [01:02, 320.62it/s, env_step=5200000, len=1552, n/ep=1, n/st=400, pursuer_0/loss=1.390, pursuer_1/loss=1.468, pursuer_2/loss=1.382, pursuer_3/loss=1.585, pursuer_4/loss=1.445, pursuer_5/loss=1.282, pursuer_6/loss=1.341, pursuer_7/loss=1.471, rew=391.28]                           


Steps Policy Saved  1990
Epoch #260: test_reward: 354.216542 ± 204.809779, best_reward: 453.121708 ± 197.576760 in #257


Epoch #261: 20001it [01:02, 320.90it/s, env_step=5220000, len=2680, n/ep=0, n/st=400, pursuer_0/loss=1.354, pursuer_1/loss=1.408, pursuer_2/loss=1.216, pursuer_3/loss=1.527, pursuer_4/loss=1.347, pursuer_5/loss=1.401, pursuer_6/loss=1.451, pursuer_7/loss=1.590, rew=360.95]                           


Steps Policy Saved  2000
Epoch #261: test_reward: 355.814333 ± 178.104569, best_reward: 453.121708 ± 197.576760 in #257


Epoch #262: 20001it [01:02, 318.17it/s, env_step=5240000, len=1936, n/ep=0, n/st=400, pursuer_0/loss=1.350, pursuer_1/loss=1.251, pursuer_2/loss=1.298, pursuer_3/loss=1.395, pursuer_4/loss=1.348, pursuer_5/loss=1.362, pursuer_6/loss=1.352, pursuer_7/loss=1.498, rew=426.54]                           


Epoch #262: test_reward: 367.294042 ± 163.750018, best_reward: 453.121708 ± 197.576760 in #257


Epoch #263: 20001it [01:02, 322.26it/s, env_step=5260000, len=1592, n/ep=0, n/st=400, pursuer_0/loss=1.216, pursuer_1/loss=1.461, pursuer_2/loss=1.489, pursuer_3/loss=1.353, pursuer_4/loss=1.250, pursuer_5/loss=1.446, pursuer_6/loss=1.359, pursuer_7/loss=1.451, rew=410.03]                           


Epoch #263: test_reward: 264.509458 ± 220.610184, best_reward: 453.121708 ± 197.576760 in #257


Epoch #264: 20001it [01:01, 325.04it/s, env_step=5280000, len=1512, n/ep=1, n/st=400, pursuer_0/loss=1.294, pursuer_1/loss=1.296, pursuer_2/loss=1.393, pursuer_3/loss=1.348, pursuer_4/loss=1.248, pursuer_5/loss=1.425, pursuer_6/loss=1.521, pursuer_7/loss=1.341, rew=508.50]                           


Epoch #264: test_reward: 414.554042 ± 165.657657, best_reward: 453.121708 ± 197.576760 in #257


Epoch #265: 20001it [01:06, 300.40it/s, env_step=5300000, len=1416, n/ep=0, n/st=400, pursuer_0/loss=1.268, pursuer_1/loss=1.400, pursuer_2/loss=1.357, pursuer_3/loss=1.382, pursuer_4/loss=1.296, pursuer_5/loss=1.313, pursuer_6/loss=1.545, pursuer_7/loss=1.456, rew=618.00]                           


Epoch #265: test_reward: 215.832208 ± 199.700710, best_reward: 453.121708 ± 197.576760 in #257


Epoch #266: 20001it [01:05, 307.35it/s, env_step=5320000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.425, pursuer_1/loss=1.318, pursuer_2/loss=1.323, pursuer_3/loss=1.463, pursuer_4/loss=1.416, pursuer_5/loss=1.434, pursuer_6/loss=1.590, pursuer_7/loss=1.568, rew=305.54]                           


Epoch #266: test_reward: 318.489000 ± 201.924696, best_reward: 453.121708 ± 197.576760 in #257


Epoch #267: 20001it [01:05, 304.06it/s, env_step=5340000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.304, pursuer_1/loss=1.438, pursuer_2/loss=1.418, pursuer_3/loss=1.402, pursuer_4/loss=1.455, pursuer_5/loss=1.467, pursuer_6/loss=1.429, pursuer_7/loss=1.620, rew=309.19]                           


Epoch #267: test_reward: 375.932083 ± 208.210752, best_reward: 453.121708 ± 197.576760 in #257


Epoch #268: 20001it [01:06, 301.05it/s, env_step=5360000, len=1208, n/ep=0, n/st=400, pursuer_0/loss=1.342, pursuer_1/loss=1.558, pursuer_2/loss=1.464, pursuer_3/loss=1.630, pursuer_4/loss=1.406, pursuer_5/loss=1.512, pursuer_6/loss=1.550, pursuer_7/loss=1.565, rew=628.58]                           


Epoch #268: test_reward: 373.271417 ± 173.560886, best_reward: 453.121708 ± 197.576760 in #257


Epoch #269: 20001it [01:05, 304.04it/s, env_step=5380000, len=1448, n/ep=0, n/st=400, pursuer_0/loss=1.327, pursuer_1/loss=1.333, pursuer_2/loss=1.402, pursuer_3/loss=1.407, pursuer_4/loss=1.327, pursuer_5/loss=1.343, pursuer_6/loss=1.461, pursuer_7/loss=1.439, rew=553.34]                           


Epoch #269: test_reward: 315.148500 ± 209.668437, best_reward: 453.121708 ± 197.576760 in #257


Epoch #270: 20001it [01:04, 311.08it/s, env_step=5400000, len=2952, n/ep=0, n/st=400, pursuer_0/loss=1.319, pursuer_1/loss=1.428, pursuer_2/loss=1.277, pursuer_3/loss=1.438, pursuer_4/loss=1.473, pursuer_5/loss=1.505, pursuer_6/loss=1.539, pursuer_7/loss=1.607, rew=261.57]                           


Epoch #270: test_reward: 313.103958 ± 174.997859, best_reward: 453.121708 ± 197.576760 in #257


Epoch #271: 20001it [01:03, 317.30it/s, env_step=5420000, len=840, n/ep=1, n/st=400, pursuer_0/loss=1.207, pursuer_1/loss=1.432, pursuer_2/loss=1.348, pursuer_3/loss=1.454, pursuer_4/loss=1.380, pursuer_5/loss=1.457, pursuer_6/loss=1.418, pursuer_7/loss=1.313, rew=604.61]                            


Epoch #271: test_reward: 406.702292 ± 181.352181, best_reward: 453.121708 ± 197.576760 in #257


Epoch #272: 20001it [01:03, 313.31it/s, env_step=5440000, len=864, n/ep=0, n/st=400, pursuer_0/loss=1.277, pursuer_1/loss=1.448, pursuer_2/loss=1.309, pursuer_3/loss=1.533, pursuer_4/loss=1.341, pursuer_5/loss=1.388, pursuer_6/loss=1.508, pursuer_7/loss=1.640, rew=722.69]                            


Epoch #272: test_reward: 323.554833 ± 203.750745, best_reward: 453.121708 ± 197.576760 in #257


Epoch #273: 20001it [01:02, 321.68it/s, env_step=5460000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.338, pursuer_1/loss=1.464, pursuer_2/loss=1.401, pursuer_3/loss=1.503, pursuer_4/loss=1.391, pursuer_5/loss=1.569, pursuer_6/loss=1.459, pursuer_7/loss=1.481, rew=113.72]                           


Epoch #273: test_reward: 325.012708 ± 226.030062, best_reward: 453.121708 ± 197.576760 in #257


Epoch #274: 20001it [01:03, 316.80it/s, env_step=5480000, len=1600, n/ep=0, n/st=400, pursuer_0/loss=1.216, pursuer_1/loss=1.387, pursuer_2/loss=1.386, pursuer_3/loss=1.624, pursuer_4/loss=1.342, pursuer_5/loss=1.401, pursuer_6/loss=1.408, pursuer_7/loss=1.621, rew=355.46]                           


Steps Policy Saved  2120
Epoch #274: test_reward: 294.841500 ± 154.260193, best_reward: 453.121708 ± 197.576760 in #257


Epoch #275: 20001it [01:01, 324.87it/s, env_step=5500000, len=3072, n/ep=0, n/st=400, pursuer_0/loss=1.322, pursuer_1/loss=1.385, pursuer_2/loss=1.473, pursuer_3/loss=1.546, pursuer_4/loss=1.393, pursuer_5/loss=1.409, pursuer_6/loss=1.325, pursuer_7/loss=1.358, rew=281.58]                           


Steps Policy Saved  2130
Epoch #275: test_reward: 399.196958 ± 189.780007, best_reward: 453.121708 ± 197.576760 in #257


Epoch #276: 20001it [01:00, 330.53it/s, env_step=5520000, len=2512, n/ep=0, n/st=400, pursuer_0/loss=1.208, pursuer_1/loss=1.247, pursuer_2/loss=1.374, pursuer_3/loss=1.529, pursuer_4/loss=1.314, pursuer_5/loss=1.465, pursuer_6/loss=1.546, pursuer_7/loss=1.628, rew=367.48]                           


Epoch #276: test_reward: 314.396708 ± 175.447779, best_reward: 453.121708 ± 197.576760 in #257


Epoch #277: 20001it [01:01, 327.25it/s, env_step=5540000, len=1176, n/ep=0, n/st=400, pursuer_0/loss=1.288, pursuer_1/loss=1.349, pursuer_2/loss=1.347, pursuer_3/loss=1.489, pursuer_4/loss=1.228, pursuer_5/loss=1.357, pursuer_6/loss=1.354, pursuer_7/loss=1.435, rew=576.16]                           


Epoch #277: test_reward: 323.532208 ± 221.791840, best_reward: 453.121708 ± 197.576760 in #257


Epoch #278: 20001it [01:04, 311.03it/s, env_step=5560000, len=896, n/ep=0, n/st=400, pursuer_0/loss=1.221, pursuer_1/loss=1.363, pursuer_2/loss=1.376, pursuer_3/loss=1.475, pursuer_4/loss=1.385, pursuer_5/loss=1.336, pursuer_6/loss=1.491, pursuer_7/loss=1.518, rew=414.37]                            


Epoch #278: test_reward: 346.816500 ± 155.678752, best_reward: 453.121708 ± 197.576760 in #257


Epoch #279: 20001it [01:01, 324.74it/s, env_step=5580000, len=1632, n/ep=0, n/st=400, pursuer_0/loss=1.247, pursuer_1/loss=1.377, pursuer_2/loss=1.483, pursuer_3/loss=1.501, pursuer_4/loss=1.402, pursuer_5/loss=1.337, pursuer_6/loss=1.458, pursuer_7/loss=1.648, rew=525.99]                           


Epoch #279: test_reward: 362.351667 ± 133.956329, best_reward: 453.121708 ± 197.576760 in #257


Epoch #280: 20001it [01:02, 319.75it/s, env_step=5600000, len=1352, n/ep=0, n/st=400, pursuer_0/loss=1.326, pursuer_1/loss=1.465, pursuer_2/loss=1.427, pursuer_3/loss=1.437, pursuer_4/loss=1.543, pursuer_5/loss=1.472, pursuer_6/loss=1.614, pursuer_7/loss=1.592, rew=429.61]                           


Epoch #280: test_reward: 308.095333 ± 144.671191, best_reward: 453.121708 ± 197.576760 in #257


Epoch #281: 20001it [01:02, 317.73it/s, env_step=5620000, len=2056, n/ep=0, n/st=400, pursuer_0/loss=1.380, pursuer_1/loss=1.403, pursuer_2/loss=1.509, pursuer_3/loss=1.576, pursuer_4/loss=1.366, pursuer_5/loss=1.420, pursuer_6/loss=1.625, pursuer_7/loss=1.455, rew=469.08]                           


Epoch #281: test_reward: 382.150667 ± 162.947908, best_reward: 453.121708 ± 197.576760 in #257


Epoch #282: 20001it [01:01, 327.01it/s, env_step=5640000, len=1192, n/ep=2, n/st=400, pursuer_0/loss=1.242, pursuer_1/loss=1.388, pursuer_2/loss=1.436, pursuer_3/loss=1.454, pursuer_4/loss=1.424, pursuer_5/loss=1.401, pursuer_6/loss=1.492, pursuer_7/loss=1.654, rew=600.79]                           


Epoch #282: test_reward: 291.841292 ± 148.104486, best_reward: 453.121708 ± 197.576760 in #257


Epoch #283: 20001it [01:03, 315.25it/s, env_step=5660000, len=1600, n/ep=0, n/st=400, pursuer_0/loss=1.217, pursuer_1/loss=1.299, pursuer_2/loss=1.346, pursuer_3/loss=1.351, pursuer_4/loss=1.399, pursuer_5/loss=1.330, pursuer_6/loss=1.499, pursuer_7/loss=1.487, rew=410.15]                           


Epoch #283: test_reward: 243.826458 ± 194.092087, best_reward: 453.121708 ± 197.576760 in #257


Epoch #284: 20001it [00:59, 335.39it/s, env_step=5680000, len=2112, n/ep=0, n/st=400, pursuer_0/loss=1.260, pursuer_1/loss=1.289, pursuer_2/loss=1.361, pursuer_3/loss=1.429, pursuer_4/loss=1.446, pursuer_5/loss=1.472, pursuer_6/loss=1.520, pursuer_7/loss=1.543, rew=354.42]                           


Best Saved Rew 2212
Epoch #284: test_reward: 464.285375 ± 183.888538, best_reward: 464.285375 ± 183.888538 in #284


Epoch #285: 20001it [01:02, 322.23it/s, env_step=5700000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.297, pursuer_1/loss=1.388, pursuer_2/loss=1.591, pursuer_3/loss=1.426, pursuer_4/loss=1.450, pursuer_5/loss=1.442, pursuer_6/loss=1.515, pursuer_7/loss=1.522, rew=285.10]                           


Steps Policy Saved  2220
Epoch #285: test_reward: 277.167083 ± 186.511241, best_reward: 464.285375 ± 183.888538 in #284


Epoch #286: 20001it [01:01, 326.17it/s, env_step=5720000, len=2224, n/ep=0, n/st=400, pursuer_0/loss=1.369, pursuer_1/loss=1.268, pursuer_2/loss=1.332, pursuer_3/loss=1.400, pursuer_4/loss=1.363, pursuer_5/loss=1.394, pursuer_6/loss=1.326, pursuer_7/loss=1.416, rew=397.88]                           


Epoch #286: test_reward: 384.310083 ± 205.989111, best_reward: 464.285375 ± 183.888538 in #284


Epoch #287: 20001it [01:02, 321.99it/s, env_step=5740000, len=1136, n/ep=0, n/st=400, pursuer_0/loss=1.210, pursuer_1/loss=1.279, pursuer_2/loss=1.339, pursuer_3/loss=1.415, pursuer_4/loss=1.388, pursuer_5/loss=1.375, pursuer_6/loss=1.406, pursuer_7/loss=1.640, rew=416.13]                           


Epoch #287: test_reward: 292.341375 ± 166.065092, best_reward: 464.285375 ± 183.888538 in #284


Epoch #288: 20001it [01:02, 318.59it/s, env_step=5760000, len=2176, n/ep=0, n/st=400, pursuer_0/loss=1.280, pursuer_1/loss=1.260, pursuer_2/loss=1.493, pursuer_3/loss=1.355, pursuer_4/loss=1.466, pursuer_5/loss=1.392, pursuer_6/loss=1.419, pursuer_7/loss=1.516, rew=492.10]                           


Epoch #288: test_reward: 331.382875 ± 181.914121, best_reward: 464.285375 ± 183.888538 in #284


Epoch #289: 20001it [01:04, 310.60it/s, env_step=5780000, len=3264, n/ep=0, n/st=400, pursuer_0/loss=1.449, pursuer_1/loss=1.444, pursuer_2/loss=1.466, pursuer_3/loss=1.518, pursuer_4/loss=1.350, pursuer_5/loss=1.469, pursuer_6/loss=1.520, pursuer_7/loss=1.438, rew=209.01]                           


Epoch #289: test_reward: 398.882667 ± 156.063460, best_reward: 464.285375 ± 183.888538 in #284


Epoch #290: 20001it [01:02, 321.68it/s, env_step=5800000, len=1600, n/ep=0, n/st=400, pursuer_0/loss=1.343, pursuer_1/loss=1.285, pursuer_2/loss=1.438, pursuer_3/loss=1.409, pursuer_4/loss=1.499, pursuer_5/loss=1.502, pursuer_6/loss=1.405, pursuer_7/loss=1.436, rew=439.72]                           


Epoch #290: test_reward: 209.737042 ± 206.399869, best_reward: 464.285375 ± 183.888538 in #284


Epoch #291: 20001it [01:00, 328.35it/s, env_step=5820000, len=3032, n/ep=0, n/st=400, pursuer_0/loss=1.418, pursuer_1/loss=1.425, pursuer_2/loss=1.352, pursuer_3/loss=1.460, pursuer_4/loss=1.486, pursuer_5/loss=1.482, pursuer_6/loss=1.694, pursuer_7/loss=1.626, rew=522.68]                           


Epoch #291: test_reward: 295.232583 ± 174.405475, best_reward: 464.285375 ± 183.888538 in #284


Epoch #292: 20001it [01:05, 305.75it/s, env_step=5840000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.347, pursuer_1/loss=1.334, pursuer_2/loss=1.436, pursuer_3/loss=1.367, pursuer_4/loss=1.434, pursuer_5/loss=1.478, pursuer_6/loss=1.444, pursuer_7/loss=1.592, rew=125.76]                           


Epoch #292: test_reward: 218.423917 ± 150.998070, best_reward: 464.285375 ± 183.888538 in #284


Epoch #293: 20001it [01:02, 319.76it/s, env_step=5860000, len=2272, n/ep=1, n/st=400, pursuer_0/loss=1.403, pursuer_1/loss=1.454, pursuer_2/loss=1.439, pursuer_3/loss=1.463, pursuer_4/loss=1.501, pursuer_5/loss=1.403, pursuer_6/loss=1.485, pursuer_7/loss=1.532, rew=292.93]                           


Epoch #293: test_reward: 414.187083 ± 173.714047, best_reward: 464.285375 ± 183.888538 in #284


Epoch #294: 20001it [01:01, 322.67it/s, env_step=5880000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.275, pursuer_1/loss=1.300, pursuer_2/loss=1.368, pursuer_3/loss=1.486, pursuer_4/loss=1.502, pursuer_5/loss=1.476, pursuer_6/loss=1.398, pursuer_7/loss=1.480, rew=269.08]                           


Epoch #294: test_reward: 356.422000 ± 184.772723, best_reward: 464.285375 ± 183.888538 in #284


Epoch #295: 20001it [01:02, 318.95it/s, env_step=5900000, len=1232, n/ep=1, n/st=400, pursuer_0/loss=1.254, pursuer_1/loss=1.249, pursuer_2/loss=1.432, pursuer_3/loss=1.479, pursuer_4/loss=1.218, pursuer_5/loss=1.349, pursuer_6/loss=1.363, pursuer_7/loss=1.441, rew=585.73]                           


Epoch #295: test_reward: 362.621333 ± 149.666253, best_reward: 464.285375 ± 183.888538 in #284


Epoch #296: 20001it [01:02, 320.34it/s, env_step=5920000, len=1936, n/ep=0, n/st=400, pursuer_0/loss=1.183, pursuer_1/loss=1.363, pursuer_2/loss=1.327, pursuer_3/loss=1.475, pursuer_4/loss=1.315, pursuer_5/loss=1.386, pursuer_6/loss=1.337, pursuer_7/loss=1.576, rew=521.59]                           


Epoch #296: test_reward: 420.602667 ± 200.740788, best_reward: 464.285375 ± 183.888538 in #284


Epoch #297: 20001it [01:02, 318.14it/s, env_step=5940000, len=2572, n/ep=0, n/st=400, pursuer_0/loss=1.287, pursuer_1/loss=1.354, pursuer_2/loss=1.285, pursuer_3/loss=1.330, pursuer_4/loss=1.288, pursuer_5/loss=1.396, pursuer_6/loss=1.359, pursuer_7/loss=1.429, rew=289.22]                           


Epoch #297: test_reward: 424.331792 ± 215.449119, best_reward: 464.285375 ± 183.888538 in #284


Epoch #298: 20001it [01:01, 325.15it/s, env_step=5960000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.176, pursuer_1/loss=1.293, pursuer_2/loss=1.277, pursuer_3/loss=1.423, pursuer_4/loss=1.185, pursuer_5/loss=1.290, pursuer_6/loss=1.339, pursuer_7/loss=1.487, rew=353.50]                           


Epoch #298: test_reward: 362.542333 ± 218.499812, best_reward: 464.285375 ± 183.888538 in #284


Epoch #299: 20001it [01:01, 323.16it/s, env_step=5980000, len=1808, n/ep=0, n/st=400, pursuer_0/loss=1.102, pursuer_1/loss=1.325, pursuer_2/loss=1.399, pursuer_3/loss=1.487, pursuer_4/loss=1.209, pursuer_5/loss=1.385, pursuer_6/loss=1.466, pursuer_7/loss=1.496, rew=369.61]                           


Epoch #299: test_reward: 334.701958 ± 162.778652, best_reward: 464.285375 ± 183.888538 in #284


Epoch #300: 20001it [01:03, 317.19it/s, env_step=6000000, len=992, n/ep=0, n/st=400, pursuer_0/loss=1.216, pursuer_1/loss=1.309, pursuer_2/loss=1.324, pursuer_3/loss=1.436, pursuer_4/loss=1.256, pursuer_5/loss=1.277, pursuer_6/loss=1.440, pursuer_7/loss=1.404, rew=375.70]                            


Epoch #300: test_reward: 357.381958 ± 188.818879, best_reward: 464.285375 ± 183.888538 in #284


Epoch #301: 20001it [01:05, 305.08it/s, env_step=6020000, len=2840, n/ep=0, n/st=400, pursuer_0/loss=1.251, pursuer_1/loss=1.351, pursuer_2/loss=1.515, pursuer_3/loss=1.482, pursuer_4/loss=1.307, pursuer_5/loss=1.418, pursuer_6/loss=1.391, pursuer_7/loss=1.733, rew=481.48]                           


Epoch #301: test_reward: 333.095875 ± 187.397662, best_reward: 464.285375 ± 183.888538 in #284


Epoch #302: 20001it [01:02, 322.06it/s, env_step=6040000, len=960, n/ep=0, n/st=400, pursuer_0/loss=1.176, pursuer_1/loss=1.469, pursuer_2/loss=1.294, pursuer_3/loss=1.446, pursuer_4/loss=1.290, pursuer_5/loss=1.379, pursuer_6/loss=1.518, pursuer_7/loss=1.458, rew=447.69]                            


Epoch #302: test_reward: 403.731083 ± 184.674128, best_reward: 464.285375 ± 183.888538 in #284


Epoch #303: 20001it [00:59, 334.64it/s, env_step=6060000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.160, pursuer_1/loss=1.347, pursuer_2/loss=1.494, pursuer_3/loss=1.477, pursuer_4/loss=1.351, pursuer_5/loss=1.350, pursuer_6/loss=1.502, pursuer_7/loss=1.425, rew=309.82]                           


Epoch #303: test_reward: 422.658167 ± 174.326141, best_reward: 464.285375 ± 183.888538 in #284


Epoch #304: 20001it [01:05, 303.32it/s, env_step=6080000, len=3048, n/ep=0, n/st=400, pursuer_0/loss=1.219, pursuer_1/loss=1.418, pursuer_2/loss=1.368, pursuer_3/loss=1.569, pursuer_4/loss=1.270, pursuer_5/loss=1.361, pursuer_6/loss=1.428, pursuer_7/loss=1.494, rew=302.16]                           


Epoch #304: test_reward: 319.761458 ± 195.133226, best_reward: 464.285375 ± 183.888538 in #284


Epoch #305: 20001it [01:03, 313.70it/s, env_step=6100000, len=1464, n/ep=0, n/st=400, pursuer_0/loss=1.070, pursuer_1/loss=1.243, pursuer_2/loss=1.355, pursuer_3/loss=1.456, pursuer_4/loss=1.238, pursuer_5/loss=1.264, pursuer_6/loss=1.415, pursuer_7/loss=1.513, rew=399.84]                           


Best Saved Rew 2403
Epoch #305: test_reward: 466.116958 ± 171.863003, best_reward: 466.116958 ± 171.863003 in #305


Epoch #306: 20001it [01:03, 315.23it/s, env_step=6120000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.288, pursuer_1/loss=1.380, pursuer_2/loss=1.431, pursuer_3/loss=1.672, pursuer_4/loss=1.458, pursuer_5/loss=1.294, pursuer_6/loss=1.527, pursuer_7/loss=1.556, rew=235.59]                           


Epoch #306: test_reward: 363.357000 ± 208.917437, best_reward: 466.116958 ± 171.863003 in #305


Epoch #307: 20001it [01:04, 311.82it/s, env_step=6140000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.158, pursuer_1/loss=1.294, pursuer_2/loss=1.378, pursuer_3/loss=1.416, pursuer_4/loss=1.245, pursuer_5/loss=1.331, pursuer_6/loss=1.441, pursuer_7/loss=1.377, rew=135.99]                           


Epoch #307: test_reward: 331.133375 ± 180.579821, best_reward: 466.116958 ± 171.863003 in #305


Epoch #308: 20001it [01:00, 329.89it/s, env_step=6160000, len=760, n/ep=0, n/st=400, pursuer_0/loss=1.305, pursuer_1/loss=1.151, pursuer_2/loss=1.467, pursuer_3/loss=1.438, pursuer_4/loss=1.319, pursuer_5/loss=1.437, pursuer_6/loss=1.534, pursuer_7/loss=1.380, rew=497.30]                            


Epoch #308: test_reward: 444.631542 ± 173.435075, best_reward: 466.116958 ± 171.863003 in #305


Epoch #309: 20001it [01:03, 316.41it/s, env_step=6180000, len=1400, n/ep=0, n/st=400, pursuer_0/loss=1.246, pursuer_1/loss=1.368, pursuer_2/loss=1.382, pursuer_3/loss=1.574, pursuer_4/loss=1.297, pursuer_5/loss=1.463, pursuer_6/loss=1.515, pursuer_7/loss=1.499, rew=640.09]                           


Epoch #309: test_reward: 427.811250 ± 196.102120, best_reward: 466.116958 ± 171.863003 in #305


Epoch #310: 20001it [01:03, 313.92it/s, env_step=6200000, len=2072, n/ep=0, n/st=400, pursuer_0/loss=1.247, pursuer_1/loss=1.393, pursuer_2/loss=1.466, pursuer_3/loss=1.476, pursuer_4/loss=1.138, pursuer_5/loss=1.308, pursuer_6/loss=1.585, pursuer_7/loss=1.400, rew=291.96]                           


Epoch #310: test_reward: 377.361000 ± 187.154660, best_reward: 466.116958 ± 171.863003 in #305


Epoch #311: 20001it [01:06, 301.65it/s, env_step=6220000, len=1568, n/ep=1, n/st=400, pursuer_0/loss=1.250, pursuer_1/loss=1.246, pursuer_2/loss=1.270, pursuer_3/loss=1.497, pursuer_4/loss=1.224, pursuer_5/loss=1.319, pursuer_6/loss=1.479, pursuer_7/loss=1.379, rew=437.76]                           


Epoch #311: test_reward: 393.007750 ± 162.911426, best_reward: 466.116958 ± 171.863003 in #305


Epoch #312: 20001it [01:08, 292.61it/s, env_step=6240000, len=1336, n/ep=0, n/st=400, pursuer_0/loss=1.225, pursuer_1/loss=1.307, pursuer_2/loss=1.340, pursuer_3/loss=1.349, pursuer_4/loss=1.273, pursuer_5/loss=1.378, pursuer_6/loss=1.416, pursuer_7/loss=1.496, rew=388.82]                           


Epoch #312: test_reward: 461.896958 ± 165.374381, best_reward: 466.116958 ± 171.863003 in #305


Epoch #313: 20001it [01:05, 304.81it/s, env_step=6260000, len=2360, n/ep=0, n/st=400, pursuer_0/loss=1.253, pursuer_1/loss=1.356, pursuer_2/loss=1.431, pursuer_3/loss=1.477, pursuer_4/loss=1.357, pursuer_5/loss=1.410, pursuer_6/loss=1.447, pursuer_7/loss=1.619, rew=374.42]                           


Epoch #313: test_reward: 411.124625 ± 194.890712, best_reward: 466.116958 ± 171.863003 in #305


Epoch #314: 20001it [01:07, 298.23it/s, env_step=6280000, len=3600, n/ep=0, n/st=400, pursuer_0/loss=1.234, pursuer_1/loss=1.345, pursuer_2/loss=1.319, pursuer_3/loss=1.512, pursuer_4/loss=1.248, pursuer_5/loss=1.363, pursuer_6/loss=1.598, pursuer_7/loss=1.396, rew=198.85]                           


Epoch #314: test_reward: 434.865500 ± 151.577848, best_reward: 466.116958 ± 171.863003 in #305


Epoch #315: 20001it [01:07, 296.87it/s, env_step=6300000, len=1104, n/ep=0, n/st=400, pursuer_0/loss=1.191, pursuer_1/loss=1.156, pursuer_2/loss=1.342, pursuer_3/loss=1.530, pursuer_4/loss=1.403, pursuer_5/loss=1.356, pursuer_6/loss=1.375, pursuer_7/loss=1.456, rew=487.66]                           


Epoch #315: test_reward: 366.583667 ± 174.323338, best_reward: 466.116958 ± 171.863003 in #305


Epoch #316: 20001it [01:06, 302.24it/s, env_step=6320000, len=2864, n/ep=1, n/st=400, pursuer_0/loss=1.353, pursuer_1/loss=1.268, pursuer_2/loss=1.356, pursuer_3/loss=1.467, pursuer_4/loss=1.325, pursuer_5/loss=1.386, pursuer_6/loss=1.474, pursuer_7/loss=1.294, rew=538.96]                           


Epoch #316: test_reward: 378.014000 ± 192.366726, best_reward: 466.116958 ± 171.863003 in #305


Epoch #317: 20001it [01:04, 308.73it/s, env_step=6340000, len=1984, n/ep=0, n/st=400, pursuer_0/loss=1.319, pursuer_1/loss=1.282, pursuer_2/loss=1.423, pursuer_3/loss=1.491, pursuer_4/loss=1.415, pursuer_5/loss=1.447, pursuer_6/loss=1.621, pursuer_7/loss=1.424, rew=498.18]                           


Epoch #317: test_reward: 440.075625 ± 196.655812, best_reward: 466.116958 ± 171.863003 in #305


Epoch #318: 20001it [01:01, 324.61it/s, env_step=6360000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.228, pursuer_1/loss=1.283, pursuer_2/loss=1.347, pursuer_3/loss=1.503, pursuer_4/loss=1.374, pursuer_5/loss=1.400, pursuer_6/loss=1.384, pursuer_7/loss=1.485, rew=135.40]                           


Epoch #318: test_reward: 434.044333 ± 178.953095, best_reward: 466.116958 ± 171.863003 in #305


Epoch #319: 20001it [01:02, 318.88it/s, env_step=6380000, len=1208, n/ep=0, n/st=400, pursuer_0/loss=1.188, pursuer_1/loss=1.328, pursuer_2/loss=1.327, pursuer_3/loss=1.419, pursuer_4/loss=1.396, pursuer_5/loss=1.444, pursuer_6/loss=1.534, pursuer_7/loss=1.488, rew=488.57]                           


Steps Policy Saved  2530
Epoch #319: test_reward: 421.543750 ± 191.373617, best_reward: 466.116958 ± 171.863003 in #305


Epoch #320: 20001it [01:02, 319.01it/s, env_step=6400000, len=1928, n/ep=0, n/st=400, pursuer_0/loss=1.191, pursuer_1/loss=1.149, pursuer_2/loss=1.339, pursuer_3/loss=1.505, pursuer_4/loss=1.326, pursuer_5/loss=1.268, pursuer_6/loss=1.430, pursuer_7/loss=1.382, rew=436.38]                           


Epoch #320: test_reward: 459.873792 ± 173.206168, best_reward: 466.116958 ± 171.863003 in #305


Epoch #321: 20001it [01:05, 306.97it/s, env_step=6420000, len=2212, n/ep=0, n/st=400, pursuer_0/loss=1.210, pursuer_1/loss=1.146, pursuer_2/loss=1.300, pursuer_3/loss=1.428, pursuer_4/loss=1.310, pursuer_5/loss=1.357, pursuer_6/loss=1.312, pursuer_7/loss=1.469, rew=517.80]                           


Best Saved Rew 2547
Epoch #321: test_reward: 483.994083 ± 175.135511, best_reward: 483.994083 ± 175.135511 in #321


Epoch #322: 20001it [01:05, 306.19it/s, env_step=6440000, len=1640, n/ep=0, n/st=400, pursuer_0/loss=1.097, pursuer_1/loss=1.175, pursuer_2/loss=1.228, pursuer_3/loss=1.438, pursuer_4/loss=1.245, pursuer_5/loss=1.398, pursuer_6/loss=1.306, pursuer_7/loss=1.441, rew=475.43]                           


Epoch #322: test_reward: 334.349750 ± 189.605846, best_reward: 483.994083 ± 175.135511 in #321


Epoch #323: 20001it [01:01, 324.57it/s, env_step=6460000, len=880, n/ep=0, n/st=400, pursuer_0/loss=1.161, pursuer_1/loss=1.232, pursuer_2/loss=1.284, pursuer_3/loss=1.423, pursuer_4/loss=1.179, pursuer_5/loss=1.351, pursuer_6/loss=1.432, pursuer_7/loss=1.308, rew=560.98]                            


Epoch #323: test_reward: 432.135583 ± 137.054647, best_reward: 483.994083 ± 175.135511 in #321


Epoch #324: 20001it [01:03, 314.64it/s, env_step=6480000, len=1048, n/ep=1, n/st=400, pursuer_0/loss=1.246, pursuer_1/loss=1.305, pursuer_2/loss=1.474, pursuer_3/loss=1.625, pursuer_4/loss=1.271, pursuer_5/loss=1.376, pursuer_6/loss=1.526, pursuer_7/loss=1.443, rew=484.64]                           


Epoch #324: test_reward: 361.329250 ± 177.405027, best_reward: 483.994083 ± 175.135511 in #321


Epoch #325: 20001it [01:03, 313.04it/s, env_step=6500000, len=1728, n/ep=1, n/st=400, pursuer_0/loss=1.244, pursuer_1/loss=1.286, pursuer_2/loss=1.371, pursuer_3/loss=1.401, pursuer_4/loss=1.265, pursuer_5/loss=1.362, pursuer_6/loss=1.386, pursuer_7/loss=1.462, rew=371.56]                           


Epoch #325: test_reward: 352.336583 ± 158.867332, best_reward: 483.994083 ± 175.135511 in #321


Epoch #326: 20001it [01:04, 308.42it/s, env_step=6520000, len=1424, n/ep=1, n/st=400, pursuer_0/loss=1.216, pursuer_1/loss=1.377, pursuer_2/loss=1.392, pursuer_3/loss=1.437, pursuer_4/loss=1.319, pursuer_5/loss=1.366, pursuer_6/loss=1.439, pursuer_7/loss=1.547, rew=321.35]                           


Epoch #326: test_reward: 351.944958 ± 198.413562, best_reward: 483.994083 ± 175.135511 in #321


Epoch #327: 20001it [01:02, 320.55it/s, env_step=6540000, len=1616, n/ep=0, n/st=400, pursuer_0/loss=1.336, pursuer_1/loss=1.344, pursuer_2/loss=1.368, pursuer_3/loss=1.607, pursuer_4/loss=1.445, pursuer_5/loss=1.465, pursuer_6/loss=1.400, pursuer_7/loss=1.528, rew=609.92]                           


Epoch #327: test_reward: 446.356000 ± 165.542031, best_reward: 483.994083 ± 175.135511 in #321


Epoch #328: 20001it [01:05, 307.55it/s, env_step=6560000, len=1584, n/ep=0, n/st=400, pursuer_0/loss=1.334, pursuer_1/loss=1.524, pursuer_2/loss=1.474, pursuer_3/loss=1.485, pursuer_4/loss=1.514, pursuer_5/loss=1.566, pursuer_6/loss=1.518, pursuer_7/loss=1.639, rew=451.52]                           


Epoch #328: test_reward: 387.029875 ± 178.911308, best_reward: 483.994083 ± 175.135511 in #321


Epoch #329: 20001it [01:03, 314.45it/s, env_step=6580000, len=3848, n/ep=0, n/st=400, pursuer_0/loss=1.288, pursuer_1/loss=1.292, pursuer_2/loss=1.413, pursuer_3/loss=1.546, pursuer_4/loss=1.337, pursuer_5/loss=1.371, pursuer_6/loss=1.523, pursuer_7/loss=1.518, rew=231.66]                           


Epoch #329: test_reward: 352.779500 ± 215.291948, best_reward: 483.994083 ± 175.135511 in #321


Epoch #330: 20001it [01:04, 310.65it/s, env_step=6600000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.242, pursuer_1/loss=1.280, pursuer_2/loss=1.451, pursuer_3/loss=1.457, pursuer_4/loss=1.366, pursuer_5/loss=1.346, pursuer_6/loss=1.426, pursuer_7/loss=1.485, rew=150.46]                           


Epoch #330: test_reward: 413.754583 ± 164.819135, best_reward: 483.994083 ± 175.135511 in #321


Epoch #331: 20001it [01:04, 308.23it/s, env_step=6620000, len=1464, n/ep=0, n/st=400, pursuer_0/loss=1.344, pursuer_1/loss=1.398, pursuer_2/loss=1.580, pursuer_3/loss=1.517, pursuer_4/loss=1.435, pursuer_5/loss=1.480, pursuer_6/loss=1.672, pursuer_7/loss=1.678, rew=478.00]                           


Epoch #331: test_reward: 383.348458 ± 205.541365, best_reward: 483.994083 ± 175.135511 in #321


Epoch #332: 20001it [01:03, 315.74it/s, env_step=6640000, len=3328, n/ep=0, n/st=400, pursuer_0/loss=1.404, pursuer_1/loss=1.326, pursuer_2/loss=1.521, pursuer_3/loss=1.569, pursuer_4/loss=1.456, pursuer_5/loss=1.486, pursuer_6/loss=1.518, pursuer_7/loss=1.664, rew=181.61]                           


Epoch #332: test_reward: 308.453833 ± 182.737144, best_reward: 483.994083 ± 175.135511 in #321


Epoch #333: 20001it [01:04, 309.03it/s, env_step=6660000, len=864, n/ep=0, n/st=400, pursuer_0/loss=1.397, pursuer_1/loss=1.300, pursuer_2/loss=1.491, pursuer_3/loss=1.540, pursuer_4/loss=1.457, pursuer_5/loss=1.354, pursuer_6/loss=1.511, pursuer_7/loss=1.671, rew=757.90]                            


Epoch #333: test_reward: 390.035208 ± 168.816706, best_reward: 483.994083 ± 175.135511 in #321


Epoch #334: 20001it [01:04, 311.80it/s, env_step=6680000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.433, pursuer_1/loss=1.534, pursuer_2/loss=1.566, pursuer_3/loss=1.634, pursuer_4/loss=1.391, pursuer_5/loss=1.542, pursuer_6/loss=1.684, pursuer_7/loss=1.586, rew=184.89]                           


Epoch #334: test_reward: 384.333417 ± 209.442587, best_reward: 483.994083 ± 175.135511 in #321


Epoch #335: 20001it [01:02, 318.21it/s, env_step=6700000, len=1344, n/ep=0, n/st=400, pursuer_0/loss=1.399, pursuer_1/loss=1.359, pursuer_2/loss=1.540, pursuer_3/loss=1.604, pursuer_4/loss=1.571, pursuer_5/loss=1.292, pursuer_6/loss=1.385, pursuer_7/loss=1.619, rew=520.14]                           


Epoch #335: test_reward: 313.481875 ± 236.386539, best_reward: 483.994083 ± 175.135511 in #321


Epoch #336: 20001it [01:05, 303.87it/s, env_step=6720000, len=2304, n/ep=0, n/st=400, pursuer_0/loss=1.368, pursuer_1/loss=1.305, pursuer_2/loss=1.488, pursuer_3/loss=1.590, pursuer_4/loss=1.520, pursuer_5/loss=1.462, pursuer_6/loss=1.619, pursuer_7/loss=1.545, rew=335.14]                           


Epoch #336: test_reward: 414.146917 ± 165.234810, best_reward: 483.994083 ± 175.135511 in #321


Epoch #337: 20001it [01:05, 307.22it/s, env_step=6740000, len=2312, n/ep=0, n/st=400, pursuer_0/loss=1.459, pursuer_1/loss=1.370, pursuer_2/loss=1.396, pursuer_3/loss=1.720, pursuer_4/loss=1.505, pursuer_5/loss=1.537, pursuer_6/loss=1.514, pursuer_7/loss=1.642, rew=549.36]                           


Epoch #337: test_reward: 357.041625 ± 160.133990, best_reward: 483.994083 ± 175.135511 in #321


Epoch #338: 20001it [01:05, 306.24it/s, env_step=6760000, len=3040, n/ep=1, n/st=400, pursuer_0/loss=1.338, pursuer_1/loss=1.311, pursuer_2/loss=1.467, pursuer_3/loss=1.450, pursuer_4/loss=1.373, pursuer_5/loss=1.405, pursuer_6/loss=1.378, pursuer_7/loss=1.553, rew=330.48]                           


Epoch #338: test_reward: 441.960292 ± 184.305770, best_reward: 483.994083 ± 175.135511 in #321


Epoch #339: 20001it [01:04, 312.27it/s, env_step=6780000, len=2128, n/ep=0, n/st=400, pursuer_0/loss=1.327, pursuer_1/loss=1.383, pursuer_2/loss=1.270, pursuer_3/loss=1.589, pursuer_4/loss=1.424, pursuer_5/loss=1.342, pursuer_6/loss=1.424, pursuer_7/loss=1.606, rew=462.00]                           


Epoch #339: test_reward: 301.045167 ± 208.466716, best_reward: 483.994083 ± 175.135511 in #321


Epoch #340: 20001it [01:05, 306.31it/s, env_step=6800000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.267, pursuer_1/loss=1.220, pursuer_2/loss=1.567, pursuer_3/loss=1.447, pursuer_4/loss=1.430, pursuer_5/loss=1.343, pursuer_6/loss=1.599, pursuer_7/loss=1.583, rew=388.54]                           


Epoch #340: test_reward: 376.819458 ± 170.071083, best_reward: 483.994083 ± 175.135511 in #321


Epoch #341: 20001it [01:05, 303.48it/s, env_step=6820000, len=2816, n/ep=0, n/st=400, pursuer_0/loss=1.211, pursuer_1/loss=1.516, pursuer_2/loss=1.438, pursuer_3/loss=1.535, pursuer_4/loss=1.348, pursuer_5/loss=1.383, pursuer_6/loss=1.574, pursuer_7/loss=1.504, rew=369.93]                           


Epoch #341: test_reward: 364.109500 ± 202.223589, best_reward: 483.994083 ± 175.135511 in #321


Epoch #342: 20001it [01:06, 302.24it/s, env_step=6840000, len=3216, n/ep=0, n/st=400, pursuer_0/loss=1.218, pursuer_1/loss=1.219, pursuer_2/loss=1.364, pursuer_3/loss=1.542, pursuer_4/loss=1.182, pursuer_5/loss=1.338, pursuer_6/loss=1.446, pursuer_7/loss=1.351, rew=287.54]                           


Epoch #342: test_reward: 406.639667 ± 223.757611, best_reward: 483.994083 ± 175.135511 in #321


Epoch #343: 20001it [01:06, 300.10it/s, env_step=6860000, len=2000, n/ep=0, n/st=400, pursuer_0/loss=1.278, pursuer_1/loss=1.468, pursuer_2/loss=1.343, pursuer_3/loss=1.510, pursuer_4/loss=1.334, pursuer_5/loss=1.275, pursuer_6/loss=1.252, pursuer_7/loss=1.465, rew=254.93]                           


Epoch #343: test_reward: 336.464375 ± 184.457646, best_reward: 483.994083 ± 175.135511 in #321


Epoch #344: 20001it [01:03, 316.41it/s, env_step=6880000, len=2784, n/ep=0, n/st=400, pursuer_0/loss=1.231, pursuer_1/loss=1.404, pursuer_2/loss=1.249, pursuer_3/loss=1.414, pursuer_4/loss=1.378, pursuer_5/loss=1.295, pursuer_6/loss=1.548, pursuer_7/loss=1.584, rew=235.69]                           


Epoch #344: test_reward: 411.182458 ± 189.829369, best_reward: 483.994083 ± 175.135511 in #321


Epoch #345: 20001it [01:02, 318.12it/s, env_step=6900000, len=1608, n/ep=0, n/st=400, pursuer_0/loss=1.253, pursuer_1/loss=1.348, pursuer_2/loss=1.333, pursuer_3/loss=1.564, pursuer_4/loss=1.255, pursuer_5/loss=1.338, pursuer_6/loss=1.500, pursuer_7/loss=1.522, rew=386.04]                           


Epoch #345: test_reward: 371.593500 ± 148.745501, best_reward: 483.994083 ± 175.135511 in #321


Epoch #346: 20001it [01:01, 324.75it/s, env_step=6920000, len=2040, n/ep=2, n/st=400, pursuer_0/loss=1.311, pursuer_1/loss=1.404, pursuer_2/loss=1.375, pursuer_3/loss=1.456, pursuer_4/loss=1.208, pursuer_5/loss=1.225, pursuer_6/loss=1.444, pursuer_7/loss=1.520, rew=316.02]                           


Epoch #346: test_reward: 348.150542 ± 178.920231, best_reward: 483.994083 ± 175.135511 in #321


Epoch #347: 20001it [01:01, 324.11it/s, env_step=6940000, len=1864, n/ep=0, n/st=400, pursuer_0/loss=1.217, pursuer_1/loss=1.413, pursuer_2/loss=1.454, pursuer_3/loss=1.374, pursuer_4/loss=1.280, pursuer_5/loss=1.486, pursuer_6/loss=1.468, pursuer_7/loss=1.557, rew=378.00]                           


Epoch #347: test_reward: 333.541042 ± 155.239887, best_reward: 483.994083 ± 175.135511 in #321


Epoch #348: 20001it [01:02, 319.94it/s, env_step=6960000, len=1416, n/ep=0, n/st=400, pursuer_0/loss=1.217, pursuer_1/loss=1.318, pursuer_2/loss=1.436, pursuer_3/loss=1.489, pursuer_4/loss=1.330, pursuer_5/loss=1.291, pursuer_6/loss=1.351, pursuer_7/loss=1.552, rew=453.11]                           


Epoch #348: test_reward: 367.209417 ± 212.701586, best_reward: 483.994083 ± 175.135511 in #321


Epoch #349: 20001it [01:02, 320.16it/s, env_step=6980000, len=1712, n/ep=0, n/st=400, pursuer_0/loss=1.125, pursuer_1/loss=1.426, pursuer_2/loss=1.410, pursuer_3/loss=1.498, pursuer_4/loss=1.271, pursuer_5/loss=1.459, pursuer_6/loss=1.574, pursuer_7/loss=1.528, rew=547.65]                           


Epoch #349: test_reward: 298.109125 ± 196.942615, best_reward: 483.994083 ± 175.135511 in #321


Epoch #350: 20001it [01:04, 311.11it/s, env_step=7000000, len=1176, n/ep=0, n/st=400, pursuer_0/loss=1.338, pursuer_1/loss=1.458, pursuer_2/loss=1.503, pursuer_3/loss=1.743, pursuer_4/loss=1.376, pursuer_5/loss=1.475, pursuer_6/loss=1.509, pursuer_7/loss=1.721, rew=456.57]                           


Epoch #350: test_reward: 381.002875 ± 192.189981, best_reward: 483.994083 ± 175.135511 in #321


Epoch #351: 20001it [01:03, 316.00it/s, env_step=7020000, len=912, n/ep=0, n/st=400, pursuer_0/loss=1.354, pursuer_1/loss=1.424, pursuer_2/loss=1.425, pursuer_3/loss=1.554, pursuer_4/loss=1.361, pursuer_5/loss=1.380, pursuer_6/loss=1.481, pursuer_7/loss=1.628, rew=572.93]                            


Epoch #351: test_reward: 324.684917 ± 164.433474, best_reward: 483.994083 ± 175.135511 in #321


Epoch #352: 20001it [01:05, 303.36it/s, env_step=7040000, len=952, n/ep=1, n/st=400, pursuer_0/loss=1.394, pursuer_1/loss=1.587, pursuer_2/loss=1.592, pursuer_3/loss=1.788, pursuer_4/loss=1.373, pursuer_5/loss=1.497, pursuer_6/loss=1.608, pursuer_7/loss=1.734, rew=599.45]                            


Epoch #352: test_reward: 361.424542 ± 171.780784, best_reward: 483.994083 ± 175.135511 in #321


Epoch #353: 20001it [01:02, 318.40it/s, env_step=7060000, len=2728, n/ep=0, n/st=400, pursuer_0/loss=1.228, pursuer_1/loss=1.466, pursuer_2/loss=1.601, pursuer_3/loss=1.679, pursuer_4/loss=1.384, pursuer_5/loss=1.470, pursuer_6/loss=1.662, pursuer_7/loss=1.678, rew=351.46]                           


Epoch #353: test_reward: 366.790875 ± 201.070588, best_reward: 483.994083 ± 175.135511 in #321


Epoch #354: 20001it [01:00, 332.04it/s, env_step=7080000, len=3920, n/ep=2, n/st=400, pursuer_0/loss=1.408, pursuer_1/loss=1.506, pursuer_2/loss=1.655, pursuer_3/loss=1.815, pursuer_4/loss=1.552, pursuer_5/loss=1.505, pursuer_6/loss=1.598, pursuer_7/loss=1.742, rew=117.98]                           


Epoch #354: test_reward: 281.247583 ± 226.097925, best_reward: 483.994083 ± 175.135511 in #321


Epoch #355: 20001it [01:01, 327.18it/s, env_step=7100000, len=1344, n/ep=0, n/st=400, pursuer_0/loss=1.410, pursuer_1/loss=1.603, pursuer_2/loss=1.689, pursuer_3/loss=1.812, pursuer_4/loss=1.538, pursuer_5/loss=1.547, pursuer_6/loss=1.668, pursuer_7/loss=1.862, rew=614.79]                           


Epoch #355: test_reward: 445.118000 ± 187.271935, best_reward: 483.994083 ± 175.135511 in #321


Epoch #356: 20001it [00:59, 334.00it/s, env_step=7120000, len=3992, n/ep=1, n/st=400, pursuer_0/loss=1.373, pursuer_1/loss=1.453, pursuer_2/loss=1.623, pursuer_3/loss=1.685, pursuer_4/loss=1.562, pursuer_5/loss=1.555, pursuer_6/loss=1.690, pursuer_7/loss=1.716, rew=354.48]                           


Epoch #356: test_reward: 382.549167 ± 175.662602, best_reward: 483.994083 ± 175.135511 in #321


Epoch #357: 20001it [01:00, 332.87it/s, env_step=7140000, len=1160, n/ep=0, n/st=400, pursuer_0/loss=1.497, pursuer_1/loss=1.537, pursuer_2/loss=1.552, pursuer_3/loss=1.761, pursuer_4/loss=1.525, pursuer_5/loss=1.552, pursuer_6/loss=1.761, pursuer_7/loss=1.686, rew=474.44]                           


Epoch #357: test_reward: 289.724542 ± 185.699068, best_reward: 483.994083 ± 175.135511 in #321


Epoch #358: 20001it [01:04, 310.45it/s, env_step=7160000, len=1000, n/ep=0, n/st=400, pursuer_0/loss=1.449, pursuer_1/loss=1.419, pursuer_2/loss=1.713, pursuer_3/loss=1.754, pursuer_4/loss=1.501, pursuer_5/loss=1.507, pursuer_6/loss=1.681, pursuer_7/loss=1.760, rew=514.31]                           


Epoch #358: test_reward: 306.924833 ± 214.791665, best_reward: 483.994083 ± 175.135511 in #321


Epoch #359: 20001it [01:00, 330.42it/s, env_step=7180000, len=1360, n/ep=0, n/st=400, pursuer_0/loss=1.557, pursuer_1/loss=1.525, pursuer_2/loss=1.736, pursuer_3/loss=1.702, pursuer_4/loss=1.595, pursuer_5/loss=1.662, pursuer_6/loss=1.637, pursuer_7/loss=1.758, rew=357.00]                           


Epoch #359: test_reward: 394.538875 ± 203.179269, best_reward: 483.994083 ± 175.135511 in #321


Epoch #360: 20001it [01:03, 312.69it/s, env_step=7200000, len=3192, n/ep=0, n/st=400, pursuer_0/loss=1.273, pursuer_1/loss=1.577, pursuer_2/loss=1.679, pursuer_3/loss=1.792, pursuer_4/loss=1.513, pursuer_5/loss=1.545, pursuer_6/loss=1.675, pursuer_7/loss=1.699, rew=230.56]                           


Epoch #360: test_reward: 375.433542 ± 181.331886, best_reward: 483.994083 ± 175.135511 in #321


Epoch #361: 20001it [01:04, 307.89it/s, env_step=7220000, len=1192, n/ep=0, n/st=400, pursuer_0/loss=1.447, pursuer_1/loss=1.538, pursuer_2/loss=1.629, pursuer_3/loss=1.887, pursuer_4/loss=1.457, pursuer_5/loss=1.534, pursuer_6/loss=1.637, pursuer_7/loss=1.700, rew=490.14]                           


Epoch #361: test_reward: 359.474500 ± 215.258845, best_reward: 483.994083 ± 175.135511 in #321


Epoch #362: 20001it [01:00, 330.43it/s, env_step=7240000, len=1592, n/ep=1, n/st=400, pursuer_0/loss=1.351, pursuer_1/loss=1.491, pursuer_2/loss=1.491, pursuer_3/loss=1.613, pursuer_4/loss=1.470, pursuer_5/loss=1.611, pursuer_6/loss=1.574, pursuer_7/loss=1.681, rew=475.02]                           


Epoch #362: test_reward: 301.876958 ± 171.121089, best_reward: 483.994083 ± 175.135511 in #321


Epoch #363: 20001it [01:01, 327.86it/s, env_step=7260000, len=1608, n/ep=0, n/st=400, pursuer_0/loss=1.402, pursuer_1/loss=1.534, pursuer_2/loss=1.670, pursuer_3/loss=1.590, pursuer_4/loss=1.500, pursuer_5/loss=1.532, pursuer_6/loss=1.624, pursuer_7/loss=1.684, rew=639.30]                           


Epoch #363: test_reward: 399.461542 ± 176.101105, best_reward: 483.994083 ± 175.135511 in #321


Epoch #364: 20001it [01:00, 328.31it/s, env_step=7280000, len=2440, n/ep=0, n/st=400, pursuer_0/loss=1.526, pursuer_1/loss=1.608, pursuer_2/loss=1.734, pursuer_3/loss=1.680, pursuer_4/loss=1.652, pursuer_5/loss=1.571, pursuer_6/loss=1.471, pursuer_7/loss=1.778, rew=390.94]                           


Epoch #364: test_reward: 380.244750 ± 177.229236, best_reward: 483.994083 ± 175.135511 in #321


Epoch #365: 20001it [01:00, 329.92it/s, env_step=7300000, len=3576, n/ep=0, n/st=400, pursuer_0/loss=1.397, pursuer_1/loss=1.455, pursuer_2/loss=1.494, pursuer_3/loss=1.604, pursuer_4/loss=1.625, pursuer_5/loss=1.491, pursuer_6/loss=1.471, pursuer_7/loss=1.662, rew=407.34]                           


Epoch #365: test_reward: 260.821375 ± 179.862786, best_reward: 483.994083 ± 175.135511 in #321


Epoch #366: 20001it [00:57, 346.76it/s, env_step=7320000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.313, pursuer_1/loss=1.407, pursuer_2/loss=1.535, pursuer_3/loss=1.529, pursuer_4/loss=1.324, pursuer_5/loss=1.419, pursuer_6/loss=1.519, pursuer_7/loss=1.573, rew=113.31]                           


Epoch #366: test_reward: 330.314500 ± 182.016602, best_reward: 483.994083 ± 175.135511 in #321


Epoch #367: 20001it [00:59, 335.95it/s, env_step=7340000, len=2292, n/ep=0, n/st=400, pursuer_0/loss=1.388, pursuer_1/loss=1.346, pursuer_2/loss=1.384, pursuer_3/loss=1.664, pursuer_4/loss=1.353, pursuer_5/loss=1.373, pursuer_6/loss=1.451, pursuer_7/loss=1.528, rew=497.20]                           


Epoch #367: test_reward: 423.367083 ± 187.657671, best_reward: 483.994083 ± 175.135511 in #321


Epoch #368: 20001it [00:58, 339.18it/s, env_step=7360000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.328, pursuer_1/loss=1.486, pursuer_2/loss=1.542, pursuer_3/loss=1.498, pursuer_4/loss=1.362, pursuer_5/loss=1.440, pursuer_6/loss=1.438, pursuer_7/loss=1.610, rew=313.22]                           


Epoch #368: test_reward: 298.975708 ± 174.424830, best_reward: 483.994083 ± 175.135511 in #321


Epoch #369: 20001it [01:00, 331.94it/s, env_step=7380000, len=1664, n/ep=0, n/st=400, pursuer_0/loss=1.397, pursuer_1/loss=1.495, pursuer_2/loss=1.695, pursuer_3/loss=1.616, pursuer_4/loss=1.478, pursuer_5/loss=1.399, pursuer_6/loss=1.524, pursuer_7/loss=1.719, rew=387.81]                           


Epoch #369: test_reward: 282.902375 ± 211.244499, best_reward: 483.994083 ± 175.135511 in #321


Epoch #370: 20001it [00:58, 342.11it/s, env_step=7400000, len=2832, n/ep=0, n/st=400, pursuer_0/loss=1.294, pursuer_1/loss=1.339, pursuer_2/loss=1.484, pursuer_3/loss=1.463, pursuer_4/loss=1.397, pursuer_5/loss=1.500, pursuer_6/loss=1.357, pursuer_7/loss=1.487, rew=250.58]                           


Epoch #370: test_reward: 279.751625 ± 203.772167, best_reward: 483.994083 ± 175.135511 in #321


Epoch #371: 20001it [00:59, 336.22it/s, env_step=7420000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.325, pursuer_1/loss=1.352, pursuer_2/loss=1.382, pursuer_3/loss=1.562, pursuer_4/loss=1.316, pursuer_5/loss=1.353, pursuer_6/loss=1.506, pursuer_7/loss=1.627, rew=208.47]                           


Epoch #371: test_reward: 321.558208 ± 168.217132, best_reward: 483.994083 ± 175.135511 in #321


Epoch #372: 20001it [01:00, 328.96it/s, env_step=7440000, len=2888, n/ep=0, n/st=400, pursuer_0/loss=1.230, pursuer_1/loss=1.503, pursuer_2/loss=1.499, pursuer_3/loss=1.492, pursuer_4/loss=1.271, pursuer_5/loss=1.399, pursuer_6/loss=1.578, pursuer_7/loss=1.533, rew=261.81]                           


Steps Policy Saved  3030
Epoch #372: test_reward: 371.001958 ± 246.558995, best_reward: 483.994083 ± 175.135511 in #321


Epoch #373: 20001it [00:59, 337.31it/s, env_step=7460000, len=1040, n/ep=0, n/st=400, pursuer_0/loss=1.314, pursuer_1/loss=1.454, pursuer_2/loss=1.419, pursuer_3/loss=1.594, pursuer_4/loss=1.470, pursuer_5/loss=1.417, pursuer_6/loss=1.342, pursuer_7/loss=1.599, rew=378.78]                           


Epoch #373: test_reward: 223.129292 ± 142.116466, best_reward: 483.994083 ± 175.135511 in #321


Epoch #374: 20001it [00:56, 352.48it/s, env_step=7480000, len=728, n/ep=0, n/st=400, pursuer_0/loss=1.396, pursuer_1/loss=1.488, pursuer_2/loss=1.552, pursuer_3/loss=1.586, pursuer_4/loss=1.509, pursuer_5/loss=1.646, pursuer_6/loss=1.506, pursuer_7/loss=1.610, rew=534.96]                            


Steps Policy Saved  3050
Epoch #374: test_reward: 261.875208 ± 180.419066, best_reward: 483.994083 ± 175.135511 in #321


Epoch #375: 20001it [00:57, 347.37it/s, env_step=7500000, len=1080, n/ep=1, n/st=400, pursuer_0/loss=1.215, pursuer_1/loss=1.291, pursuer_2/loss=1.408, pursuer_3/loss=1.537, pursuer_4/loss=1.390, pursuer_5/loss=1.345, pursuer_6/loss=1.355, pursuer_7/loss=1.670, rew=651.07]                           


Epoch #375: test_reward: 407.251667 ± 146.230570, best_reward: 483.994083 ± 175.135511 in #321


Epoch #376: 20001it [00:54, 365.67it/s, env_step=7520000, len=1016, n/ep=0, n/st=400, pursuer_0/loss=1.301, pursuer_1/loss=1.238, pursuer_2/loss=1.484, pursuer_3/loss=1.627, pursuer_4/loss=1.354, pursuer_5/loss=1.371, pursuer_6/loss=1.506, pursuer_7/loss=1.695, rew=616.75]                           


Epoch #376: test_reward: 326.460167 ± 202.677806, best_reward: 483.994083 ± 175.135511 in #321


Epoch #377: 20001it [00:57, 346.30it/s, env_step=7540000, len=1016, n/ep=0, n/st=400, pursuer_0/loss=1.299, pursuer_1/loss=1.362, pursuer_2/loss=1.686, pursuer_3/loss=1.547, pursuer_4/loss=1.366, pursuer_5/loss=1.431, pursuer_6/loss=1.401, pursuer_7/loss=1.456, rew=406.88]                           


Epoch #377: test_reward: 271.917875 ± 208.256283, best_reward: 483.994083 ± 175.135511 in #321


Epoch #378: 20001it [00:57, 344.99it/s, env_step=7560000, len=1032, n/ep=0, n/st=400, pursuer_0/loss=1.640, pursuer_1/loss=1.474, pursuer_2/loss=1.468, pursuer_3/loss=1.677, pursuer_4/loss=1.370, pursuer_5/loss=1.419, pursuer_6/loss=1.785, pursuer_7/loss=1.576, rew=485.83]                           


Epoch #378: test_reward: 334.045167 ± 211.984345, best_reward: 483.994083 ± 175.135511 in #321


Epoch #379: 20001it [00:58, 342.26it/s, env_step=7580000, len=1632, n/ep=0, n/st=400, pursuer_0/loss=1.443, pursuer_1/loss=1.692, pursuer_2/loss=1.577, pursuer_3/loss=1.687, pursuer_4/loss=1.584, pursuer_5/loss=1.504, pursuer_6/loss=1.736, pursuer_7/loss=1.856, rew=625.13]                           


Epoch #379: test_reward: 340.687167 ± 172.679776, best_reward: 483.994083 ± 175.135511 in #321


Epoch #380: 20001it [00:55, 361.37it/s, env_step=7600000, len=3492, n/ep=0, n/st=400, pursuer_0/loss=1.295, pursuer_1/loss=1.374, pursuer_2/loss=1.393, pursuer_3/loss=1.533, pursuer_4/loss=1.357, pursuer_5/loss=1.428, pursuer_6/loss=1.460, pursuer_7/loss=1.509, rew=342.83]                           


Epoch #380: test_reward: 402.517917 ± 188.911281, best_reward: 483.994083 ± 175.135511 in #321


Epoch #381: 20001it [00:56, 355.33it/s, env_step=7620000, len=1128, n/ep=1, n/st=400, pursuer_0/loss=1.415, pursuer_1/loss=1.574, pursuer_2/loss=1.569, pursuer_3/loss=1.519, pursuer_4/loss=1.467, pursuer_5/loss=1.437, pursuer_6/loss=1.594, pursuer_7/loss=1.673, rew=758.09]                           


Best Saved Rew 3125
Epoch #381: test_reward: 504.651417 ± 150.050255, best_reward: 504.651417 ± 150.050255 in #381


Epoch #382: 20001it [00:56, 356.72it/s, env_step=7640000, len=1344, n/ep=1, n/st=400, pursuer_0/loss=1.261, pursuer_1/loss=1.416, pursuer_2/loss=1.545, pursuer_3/loss=1.624, pursuer_4/loss=1.369, pursuer_5/loss=1.567, pursuer_6/loss=1.487, pursuer_7/loss=1.492, rew=339.74]                           


Epoch #382: test_reward: 392.363625 ± 136.029758, best_reward: 504.651417 ± 150.050255 in #381


Epoch #383: 20001it [00:57, 345.79it/s, env_step=7660000, len=1952, n/ep=0, n/st=400, pursuer_0/loss=1.447, pursuer_1/loss=1.522, pursuer_2/loss=1.697, pursuer_3/loss=1.762, pursuer_4/loss=1.615, pursuer_5/loss=1.659, pursuer_6/loss=1.514, pursuer_7/loss=1.651, rew=554.86]                           


Epoch #383: test_reward: 311.926125 ± 212.847179, best_reward: 504.651417 ± 150.050255 in #381


Epoch #384: 20001it [00:58, 341.07it/s, env_step=7680000, len=1472, n/ep=0, n/st=400, pursuer_0/loss=1.399, pursuer_1/loss=1.513, pursuer_2/loss=1.497, pursuer_3/loss=1.603, pursuer_4/loss=1.472, pursuer_5/loss=1.506, pursuer_6/loss=1.578, pursuer_7/loss=1.501, rew=443.23]                           


Epoch #384: test_reward: 412.339250 ± 188.842580, best_reward: 504.651417 ± 150.050255 in #381


Epoch #385: 20001it [00:56, 355.33it/s, env_step=7700000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.438, pursuer_1/loss=1.409, pursuer_2/loss=1.646, pursuer_3/loss=1.519, pursuer_4/loss=1.339, pursuer_5/loss=1.480, pursuer_6/loss=1.549, pursuer_7/loss=1.543, rew=355.20]                           


Epoch #385: test_reward: 369.781417 ± 184.541229, best_reward: 504.651417 ± 150.050255 in #381


Epoch #386: 20001it [00:56, 357.05it/s, env_step=7720000, len=3504, n/ep=0, n/st=400, pursuer_0/loss=1.490, pursuer_1/loss=1.487, pursuer_2/loss=1.602, pursuer_3/loss=1.752, pursuer_4/loss=1.530, pursuer_5/loss=1.453, pursuer_6/loss=1.700, pursuer_7/loss=1.698, rew=286.16]                           


Epoch #386: test_reward: 322.473083 ± 216.450862, best_reward: 504.651417 ± 150.050255 in #381


Epoch #387: 20001it [00:57, 347.98it/s, env_step=7740000, len=1416, n/ep=1, n/st=400, pursuer_0/loss=1.470, pursuer_1/loss=1.624, pursuer_2/loss=1.656, pursuer_3/loss=1.640, pursuer_4/loss=1.613, pursuer_5/loss=1.636, pursuer_6/loss=1.621, pursuer_7/loss=1.680, rew=412.32]                           


Epoch #387: test_reward: 254.457333 ± 177.407481, best_reward: 504.651417 ± 150.050255 in #381


Epoch #388: 20001it [00:59, 334.79it/s, env_step=7760000, len=2400, n/ep=1, n/st=400, pursuer_0/loss=1.611, pursuer_1/loss=1.531, pursuer_2/loss=1.704, pursuer_3/loss=1.581, pursuer_4/loss=1.438, pursuer_5/loss=1.555, pursuer_6/loss=1.602, pursuer_7/loss=1.611, rew=293.24]                           


Epoch #388: test_reward: 477.386083 ± 175.366511, best_reward: 504.651417 ± 150.050255 in #381


Epoch #389: 20001it [00:59, 334.70it/s, env_step=7780000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.304, pursuer_1/loss=1.330, pursuer_2/loss=1.604, pursuer_3/loss=1.587, pursuer_4/loss=1.450, pursuer_5/loss=1.490, pursuer_6/loss=1.535, pursuer_7/loss=1.608, rew=220.37]                           


Epoch #389: test_reward: 395.467708 ± 228.354643, best_reward: 504.651417 ± 150.050255 in #381


Epoch #390: 20001it [00:59, 334.06it/s, env_step=7800000, len=2944, n/ep=0, n/st=400, pursuer_0/loss=1.286, pursuer_1/loss=1.341, pursuer_2/loss=1.449, pursuer_3/loss=1.680, pursuer_4/loss=1.418, pursuer_5/loss=1.500, pursuer_6/loss=1.618, pursuer_7/loss=1.684, rew=347.93]                           


Epoch #390: test_reward: 316.947333 ± 180.364607, best_reward: 504.651417 ± 150.050255 in #381


Epoch #391: 20001it [01:01, 324.04it/s, env_step=7820000, len=1884, n/ep=2, n/st=400, pursuer_0/loss=1.410, pursuer_1/loss=1.375, pursuer_2/loss=1.578, pursuer_3/loss=1.585, pursuer_4/loss=1.494, pursuer_5/loss=1.531, pursuer_6/loss=1.603, pursuer_7/loss=1.604, rew=464.05]                           


Epoch #391: test_reward: 275.801917 ± 166.571406, best_reward: 504.651417 ± 150.050255 in #381


Epoch #392: 20001it [01:02, 321.88it/s, env_step=7840000, len=848, n/ep=0, n/st=400, pursuer_0/loss=1.308, pursuer_1/loss=1.332, pursuer_2/loss=1.576, pursuer_3/loss=1.734, pursuer_4/loss=1.388, pursuer_5/loss=1.489, pursuer_6/loss=1.494, pursuer_7/loss=1.587, rew=393.29]                            


Epoch #392: test_reward: 323.452667 ± 198.008528, best_reward: 504.651417 ± 150.050255 in #381


Epoch #393: 20001it [01:03, 316.12it/s, env_step=7860000, len=1152, n/ep=0, n/st=400, pursuer_0/loss=1.449, pursuer_1/loss=1.602, pursuer_2/loss=1.631, pursuer_3/loss=1.649, pursuer_4/loss=1.392, pursuer_5/loss=1.672, pursuer_6/loss=1.657, pursuer_7/loss=1.704, rew=660.77]                           


Steps Policy Saved  3250
Epoch #393: test_reward: 385.184750 ± 146.100863, best_reward: 504.651417 ± 150.050255 in #381


Epoch #394: 20001it [01:01, 325.90it/s, env_step=7880000, len=2632, n/ep=0, n/st=400, pursuer_0/loss=1.240, pursuer_1/loss=1.404, pursuer_2/loss=1.514, pursuer_3/loss=1.519, pursuer_4/loss=1.428, pursuer_5/loss=1.473, pursuer_6/loss=1.383, pursuer_7/loss=1.454, rew=287.75]                           


Epoch #394: test_reward: 417.172292 ± 202.397908, best_reward: 504.651417 ± 150.050255 in #381


Epoch #395: 20001it [01:02, 318.25it/s, env_step=7900000, len=3284, n/ep=0, n/st=400, pursuer_0/loss=1.424, pursuer_1/loss=1.467, pursuer_2/loss=1.725, pursuer_3/loss=1.849, pursuer_4/loss=1.496, pursuer_5/loss=1.426, pursuer_6/loss=1.562, pursuer_7/loss=1.557, rew=384.49]                           


Epoch #395: test_reward: 436.789417 ± 172.300341, best_reward: 504.651417 ± 150.050255 in #381


Epoch #396: 20001it [01:04, 308.45it/s, env_step=7920000, len=1736, n/ep=0, n/st=400, pursuer_0/loss=1.536, pursuer_1/loss=1.487, pursuer_2/loss=1.714, pursuer_3/loss=1.587, pursuer_4/loss=1.572, pursuer_5/loss=1.588, pursuer_6/loss=1.709, pursuer_7/loss=1.682, rew=315.13]                           


Epoch #396: test_reward: 387.901042 ± 232.626846, best_reward: 504.651417 ± 150.050255 in #381


Epoch #397: 20001it [01:00, 329.58it/s, env_step=7940000, len=1176, n/ep=0, n/st=400, pursuer_0/loss=1.473, pursuer_1/loss=1.613, pursuer_2/loss=1.586, pursuer_3/loss=1.779, pursuer_4/loss=1.657, pursuer_5/loss=1.587, pursuer_6/loss=1.806, pursuer_7/loss=1.583, rew=606.45]                           


Epoch #397: test_reward: 469.609417 ± 133.638797, best_reward: 504.651417 ± 150.050255 in #381


Epoch #398: 20001it [00:59, 336.15it/s, env_step=7960000, len=1288, n/ep=1, n/st=400, pursuer_0/loss=1.277, pursuer_1/loss=1.458, pursuer_2/loss=1.641, pursuer_3/loss=1.543, pursuer_4/loss=1.543, pursuer_5/loss=1.446, pursuer_6/loss=1.707, pursuer_7/loss=1.574, rew=374.35]                           


Epoch #398: test_reward: 412.075167 ± 207.079427, best_reward: 504.651417 ± 150.050255 in #381


Epoch #399: 20001it [01:01, 326.76it/s, env_step=7980000, len=872, n/ep=1, n/st=400, pursuer_0/loss=1.419, pursuer_1/loss=1.360, pursuer_2/loss=1.690, pursuer_3/loss=1.532, pursuer_4/loss=1.479, pursuer_5/loss=1.742, pursuer_6/loss=1.591, pursuer_7/loss=1.753, rew=570.75]                            


Epoch #399: test_reward: 477.326042 ± 170.339238, best_reward: 504.651417 ± 150.050255 in #381


Epoch #400: 20001it [01:00, 330.60it/s, env_step=8000000, len=3680, n/ep=0, n/st=400, pursuer_0/loss=1.381, pursuer_1/loss=1.563, pursuer_2/loss=1.623, pursuer_3/loss=1.746, pursuer_4/loss=1.557, pursuer_5/loss=1.564, pursuer_6/loss=1.645, pursuer_7/loss=1.575, rew=274.76]                           


Epoch #400: test_reward: 391.288667 ± 173.652403, best_reward: 504.651417 ± 150.050255 in #381


Epoch #401: 20001it [01:01, 327.54it/s, env_step=8020000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.544, pursuer_1/loss=1.530, pursuer_2/loss=1.625, pursuer_3/loss=1.773, pursuer_4/loss=1.532, pursuer_5/loss=1.722, pursuer_6/loss=1.724, pursuer_7/loss=1.819, rew=173.52]                           


Epoch #401: test_reward: 460.600375 ± 210.894487, best_reward: 504.651417 ± 150.050255 in #381


Epoch #402: 20001it [01:01, 323.18it/s, env_step=8040000, len=584, n/ep=0, n/st=400, pursuer_0/loss=1.578, pursuer_1/loss=1.598, pursuer_2/loss=1.667, pursuer_3/loss=1.760, pursuer_4/loss=1.620, pursuer_5/loss=1.803, pursuer_6/loss=1.841, pursuer_7/loss=1.710, rew=459.62]                            


Epoch #402: test_reward: 384.646083 ± 200.822866, best_reward: 504.651417 ± 150.050255 in #381


Epoch #403: 20001it [01:02, 320.09it/s, env_step=8060000, len=1160, n/ep=0, n/st=400, pursuer_0/loss=1.785, pursuer_1/loss=1.519, pursuer_2/loss=1.641, pursuer_3/loss=1.873, pursuer_4/loss=1.649, pursuer_5/loss=1.737, pursuer_6/loss=1.803, pursuer_7/loss=1.895, rew=576.76]                           


Epoch #403: test_reward: 460.602583 ± 181.776384, best_reward: 504.651417 ± 150.050255 in #381


Epoch #404: 20001it [01:02, 320.03it/s, env_step=8080000, len=1008, n/ep=0, n/st=400, pursuer_0/loss=1.490, pursuer_1/loss=1.700, pursuer_2/loss=1.741, pursuer_3/loss=1.845, pursuer_4/loss=1.706, pursuer_5/loss=1.652, pursuer_6/loss=1.749, pursuer_7/loss=1.795, rew=367.95]                           


Epoch #404: test_reward: 457.208083 ± 201.878377, best_reward: 504.651417 ± 150.050255 in #381


Epoch #405: 20001it [01:01, 324.54it/s, env_step=8100000, len=2472, n/ep=0, n/st=400, pursuer_0/loss=1.449, pursuer_1/loss=1.797, pursuer_2/loss=1.633, pursuer_3/loss=2.010, pursuer_4/loss=1.658, pursuer_5/loss=1.850, pursuer_6/loss=1.832, pursuer_7/loss=1.885, rew=561.85]                           


Epoch #405: test_reward: 454.161667 ± 205.946276, best_reward: 504.651417 ± 150.050255 in #381


Epoch #406: 20001it [00:59, 336.37it/s, env_step=8120000, len=792, n/ep=0, n/st=400, pursuer_0/loss=1.575, pursuer_1/loss=1.756, pursuer_2/loss=1.739, pursuer_3/loss=1.913, pursuer_4/loss=1.772, pursuer_5/loss=1.851, pursuer_6/loss=1.741, pursuer_7/loss=1.857, rew=534.68]                            


Epoch #406: test_reward: 481.718833 ± 221.998047, best_reward: 504.651417 ± 150.050255 in #381


Epoch #407: 20001it [01:01, 324.21it/s, env_step=8140000, len=1704, n/ep=0, n/st=400, pursuer_0/loss=1.645, pursuer_1/loss=1.763, pursuer_2/loss=1.643, pursuer_3/loss=2.018, pursuer_4/loss=1.670, pursuer_5/loss=1.758, pursuer_6/loss=1.727, pursuer_7/loss=1.943, rew=518.68]                           


Epoch #407: test_reward: 437.114792 ± 196.712759, best_reward: 504.651417 ± 150.050255 in #381


Epoch #408: 20001it [01:02, 317.93it/s, env_step=8160000, len=3888, n/ep=0, n/st=400, pursuer_0/loss=1.513, pursuer_1/loss=1.622, pursuer_2/loss=1.709, pursuer_3/loss=1.845, pursuer_4/loss=1.644, pursuer_5/loss=1.849, pursuer_6/loss=1.651, pursuer_7/loss=1.623, rew=205.99]                           


Epoch #408: test_reward: 437.837000 ± 160.057323, best_reward: 504.651417 ± 150.050255 in #381


Epoch #409: 20001it [01:01, 326.00it/s, env_step=8180000, len=712, n/ep=0, n/st=400, pursuer_0/loss=1.638, pursuer_1/loss=1.717, pursuer_2/loss=1.592, pursuer_3/loss=1.857, pursuer_4/loss=1.629, pursuer_5/loss=1.962, pursuer_6/loss=1.704, pursuer_7/loss=1.778, rew=612.62]                            


Epoch #409: test_reward: 452.278208 ± 190.951975, best_reward: 504.651417 ± 150.050255 in #381


Epoch #410: 20001it [01:01, 323.42it/s, env_step=8200000, len=1056, n/ep=0, n/st=400, pursuer_0/loss=1.499, pursuer_1/loss=1.500, pursuer_2/loss=1.685, pursuer_3/loss=1.715, pursuer_4/loss=1.646, pursuer_5/loss=1.735, pursuer_6/loss=1.509, pursuer_7/loss=1.636, rew=759.08]                           


Epoch #410: test_reward: 470.284500 ± 153.537693, best_reward: 504.651417 ± 150.050255 in #381


Epoch #411: 20001it [01:00, 327.95it/s, env_step=8220000, len=3216, n/ep=0, n/st=400, pursuer_0/loss=1.605, pursuer_1/loss=1.603, pursuer_2/loss=1.590, pursuer_3/loss=1.781, pursuer_4/loss=1.706, pursuer_5/loss=1.865, pursuer_6/loss=1.803, pursuer_7/loss=1.889, rew=358.08]                           


Epoch #411: test_reward: 385.967917 ± 171.468354, best_reward: 504.651417 ± 150.050255 in #381


Epoch #412: 20001it [01:00, 331.54it/s, env_step=8240000, len=2696, n/ep=0, n/st=400, pursuer_0/loss=1.468, pursuer_1/loss=1.591, pursuer_2/loss=1.786, pursuer_3/loss=1.679, pursuer_4/loss=1.636, pursuer_5/loss=1.762, pursuer_6/loss=1.903, pursuer_7/loss=1.902, rew=510.48]                           


Epoch #412: test_reward: 404.746625 ± 225.146773, best_reward: 504.651417 ± 150.050255 in #381


Epoch #413: 20001it [01:03, 313.09it/s, env_step=8260000, len=1156, n/ep=2, n/st=400, pursuer_0/loss=1.494, pursuer_1/loss=1.663, pursuer_2/loss=1.646, pursuer_3/loss=1.912, pursuer_4/loss=1.576, pursuer_5/loss=1.950, pursuer_6/loss=1.732, pursuer_7/loss=1.819, rew=606.33]                           


Epoch #413: test_reward: 378.122208 ± 180.878870, best_reward: 504.651417 ± 150.050255 in #381


Epoch #414: 20001it [01:02, 318.05it/s, env_step=8280000, len=2904, n/ep=0, n/st=400, pursuer_0/loss=1.465, pursuer_1/loss=1.548, pursuer_2/loss=1.614, pursuer_3/loss=1.715, pursuer_4/loss=1.681, pursuer_5/loss=1.727, pursuer_6/loss=1.808, pursuer_7/loss=1.768, rew=259.50]                           


Best Saved Rew 3473
Epoch #414: test_reward: 543.328792 ± 163.931681, best_reward: 543.328792 ± 163.931681 in #414


Epoch #415: 20001it [01:00, 332.21it/s, env_step=8300000, len=1640, n/ep=0, n/st=400, pursuer_0/loss=1.523, pursuer_1/loss=1.567, pursuer_2/loss=1.772, pursuer_3/loss=1.782, pursuer_4/loss=1.658, pursuer_5/loss=1.815, pursuer_6/loss=1.766, pursuer_7/loss=2.012, rew=425.41]                           


Epoch #415: test_reward: 520.073958 ± 161.329518, best_reward: 543.328792 ± 163.931681 in #414


Epoch #416: 20001it [01:03, 315.77it/s, env_step=8320000, len=1632, n/ep=0, n/st=400, pursuer_0/loss=1.474, pursuer_1/loss=1.561, pursuer_2/loss=1.728, pursuer_3/loss=1.785, pursuer_4/loss=1.657, pursuer_5/loss=1.780, pursuer_6/loss=1.807, pursuer_7/loss=1.839, rew=615.73]                           


Steps Policy Saved  3500
Epoch #416: test_reward: 470.471625 ± 203.023715, best_reward: 543.328792 ± 163.931681 in #414


Epoch #417: 20001it [01:02, 319.11it/s, env_step=8340000, len=2008, n/ep=0, n/st=400, pursuer_0/loss=1.742, pursuer_1/loss=1.717, pursuer_2/loss=1.918, pursuer_3/loss=2.000, pursuer_4/loss=1.691, pursuer_5/loss=1.773, pursuer_6/loss=1.928, pursuer_7/loss=2.145, rew=514.06]                           


Epoch #417: test_reward: 497.940292 ± 176.812666, best_reward: 543.328792 ± 163.931681 in #414


Epoch #418: 20001it [01:00, 328.39it/s, env_step=8360000, len=1680, n/ep=1, n/st=400, pursuer_0/loss=1.407, pursuer_1/loss=1.626, pursuer_2/loss=1.693, pursuer_3/loss=1.759, pursuer_4/loss=1.694, pursuer_5/loss=1.765, pursuer_6/loss=1.910, pursuer_7/loss=1.732, rew=424.85]                           


Epoch #418: test_reward: 488.081625 ± 191.108817, best_reward: 543.328792 ± 163.931681 in #414


Epoch #419: 20001it [01:01, 326.07it/s, env_step=8380000, len=1896, n/ep=0, n/st=400, pursuer_0/loss=1.685, pursuer_1/loss=1.834, pursuer_2/loss=1.913, pursuer_3/loss=1.872, pursuer_4/loss=1.737, pursuer_5/loss=1.764, pursuer_6/loss=1.789, pursuer_7/loss=1.863, rew=600.47]                           


Epoch #419: test_reward: 474.476417 ± 179.512906, best_reward: 543.328792 ± 163.931681 in #414


Epoch #420: 20001it [01:00, 331.56it/s, env_step=8400000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.535, pursuer_1/loss=1.628, pursuer_2/loss=1.722, pursuer_3/loss=1.673, pursuer_4/loss=1.682, pursuer_5/loss=1.770, pursuer_6/loss=1.684, pursuer_7/loss=1.751, rew=234.09]                           


Epoch #420: test_reward: 431.748958 ± 201.473426, best_reward: 543.328792 ± 163.931681 in #414


Epoch #421: 20001it [01:04, 311.85it/s, env_step=8420000, len=1488, n/ep=1, n/st=400, pursuer_0/loss=1.599, pursuer_1/loss=1.730, pursuer_2/loss=1.977, pursuer_3/loss=1.698, pursuer_4/loss=1.671, pursuer_5/loss=1.654, pursuer_6/loss=1.802, pursuer_7/loss=1.865, rew=622.25]                           


Epoch #421: test_reward: 302.598958 ± 196.627569, best_reward: 543.328792 ± 163.931681 in #414


Epoch #422: 20001it [01:03, 314.51it/s, env_step=8440000, len=1456, n/ep=0, n/st=400, pursuer_0/loss=1.707, pursuer_1/loss=1.817, pursuer_2/loss=1.816, pursuer_3/loss=1.760, pursuer_4/loss=1.577, pursuer_5/loss=1.740, pursuer_6/loss=1.988, pursuer_7/loss=1.776, rew=483.78]                           


Epoch #422: test_reward: 270.227542 ± 161.412872, best_reward: 543.328792 ± 163.931681 in #414


Epoch #423: 20001it [01:00, 332.29it/s, env_step=8460000, len=1600, n/ep=0, n/st=400, pursuer_0/loss=1.541, pursuer_1/loss=1.579, pursuer_2/loss=1.867, pursuer_3/loss=1.757, pursuer_4/loss=1.575, pursuer_5/loss=1.836, pursuer_6/loss=1.784, pursuer_7/loss=1.835, rew=477.91]                           


Epoch #423: test_reward: 485.441708 ± 195.582792, best_reward: 543.328792 ± 163.931681 in #414


Epoch #424: 20001it [01:01, 323.20it/s, env_step=8480000, len=736, n/ep=0, n/st=400, pursuer_0/loss=1.585, pursuer_1/loss=1.647, pursuer_2/loss=1.735, pursuer_3/loss=1.696, pursuer_4/loss=1.706, pursuer_5/loss=1.728, pursuer_6/loss=1.731, pursuer_7/loss=1.871, rew=724.97]                            


Epoch #424: test_reward: 446.021417 ± 180.561610, best_reward: 543.328792 ± 163.931681 in #414


Epoch #425: 20001it [01:01, 323.89it/s, env_step=8500000, len=1592, n/ep=1, n/st=400, pursuer_0/loss=1.860, pursuer_1/loss=1.956, pursuer_2/loss=1.787, pursuer_3/loss=1.731, pursuer_4/loss=1.864, pursuer_5/loss=1.879, pursuer_6/loss=1.800, pursuer_7/loss=1.886, rew=484.65]                           


Steps Policy Saved  3610
Epoch #425: test_reward: 407.440042 ± 186.437123, best_reward: 543.328792 ± 163.931681 in #414


Epoch #426: 20001it [01:05, 306.67it/s, env_step=8520000, len=1216, n/ep=0, n/st=400, pursuer_0/loss=1.781, pursuer_1/loss=1.787, pursuer_2/loss=1.957, pursuer_3/loss=1.824, pursuer_4/loss=1.709, pursuer_5/loss=1.774, pursuer_6/loss=1.913, pursuer_7/loss=2.007, rew=517.85]                           


Epoch #426: test_reward: 434.314167 ± 212.637462, best_reward: 543.328792 ± 163.931681 in #414


Epoch #427: 20001it [01:02, 318.44it/s, env_step=8540000, len=1280, n/ep=1, n/st=400, pursuer_0/loss=1.629, pursuer_1/loss=1.761, pursuer_2/loss=1.754, pursuer_3/loss=1.821, pursuer_4/loss=1.700, pursuer_5/loss=1.739, pursuer_6/loss=1.992, pursuer_7/loss=1.799, rew=636.87]                           


Epoch #427: test_reward: 502.162333 ± 166.498979, best_reward: 543.328792 ± 163.931681 in #414


Epoch #428: 20001it [01:01, 323.86it/s, env_step=8560000, len=936, n/ep=0, n/st=400, pursuer_0/loss=1.556, pursuer_1/loss=1.842, pursuer_2/loss=1.792, pursuer_3/loss=1.713, pursuer_4/loss=1.827, pursuer_5/loss=1.827, pursuer_6/loss=1.788, pursuer_7/loss=2.015, rew=559.60]                            


Epoch #428: test_reward: 422.298667 ± 154.992200, best_reward: 543.328792 ± 163.931681 in #414


Epoch #429: 20001it [01:01, 325.20it/s, env_step=8580000, len=3992, n/ep=1, n/st=400, pursuer_0/loss=1.760, pursuer_1/loss=1.819, pursuer_2/loss=1.830, pursuer_3/loss=1.786, pursuer_4/loss=1.612, pursuer_5/loss=1.761, pursuer_6/loss=2.012, pursuer_7/loss=1.839, rew=410.36]                           


Epoch #429: test_reward: 504.833542 ± 166.363031, best_reward: 543.328792 ± 163.931681 in #414


Epoch #430: 20001it [01:00, 332.30it/s, env_step=8600000, len=2616, n/ep=1, n/st=400, pursuer_0/loss=1.686, pursuer_1/loss=1.601, pursuer_2/loss=1.714, pursuer_3/loss=1.705, pursuer_4/loss=1.656, pursuer_5/loss=1.822, pursuer_6/loss=1.926, pursuer_7/loss=1.798, rew=537.76]                           


Epoch #430: test_reward: 504.059250 ± 197.800491, best_reward: 543.328792 ± 163.931681 in #414


Epoch #431: 20001it [01:01, 327.81it/s, env_step=8620000, len=952, n/ep=0, n/st=400, pursuer_0/loss=1.579, pursuer_1/loss=1.861, pursuer_2/loss=1.812, pursuer_3/loss=1.812, pursuer_4/loss=1.846, pursuer_5/loss=1.759, pursuer_6/loss=2.068, pursuer_7/loss=1.825, rew=569.25]                            


Epoch #431: test_reward: 422.035667 ± 168.486453, best_reward: 543.328792 ± 163.931681 in #414


Epoch #432: 20001it [01:02, 318.27it/s, env_step=8640000, len=1416, n/ep=0, n/st=400, pursuer_0/loss=1.606, pursuer_1/loss=1.826, pursuer_2/loss=1.846, pursuer_3/loss=1.624, pursuer_4/loss=1.773, pursuer_5/loss=1.861, pursuer_6/loss=1.862, pursuer_7/loss=1.703, rew=406.48]                           


Epoch #432: test_reward: 449.114083 ± 235.314275, best_reward: 543.328792 ± 163.931681 in #414


Epoch #433: 20001it [01:00, 330.28it/s, env_step=8660000, len=1000, n/ep=1, n/st=400, pursuer_0/loss=1.448, pursuer_1/loss=1.704, pursuer_2/loss=1.658, pursuer_3/loss=1.691, pursuer_4/loss=1.617, pursuer_5/loss=1.741, pursuer_6/loss=1.613, pursuer_7/loss=1.720, rew=659.06]                           


Epoch #433: test_reward: 444.393250 ± 186.328277, best_reward: 543.328792 ± 163.931681 in #414


Epoch #434: 20001it [01:01, 325.47it/s, env_step=8680000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.392, pursuer_1/loss=1.776, pursuer_2/loss=1.767, pursuer_3/loss=1.656, pursuer_4/loss=1.546, pursuer_5/loss=1.746, pursuer_6/loss=1.705, pursuer_7/loss=1.691, rew=364.10]                           


Steps Policy Saved  3710
Epoch #434: test_reward: 471.032625 ± 156.253681, best_reward: 543.328792 ± 163.931681 in #414


Epoch #435: 20001it [01:01, 325.88it/s, env_step=8700000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.607, pursuer_1/loss=1.603, pursuer_2/loss=1.691, pursuer_3/loss=1.695, pursuer_4/loss=1.591, pursuer_5/loss=1.650, pursuer_6/loss=1.829, pursuer_7/loss=1.944, rew=184.49]                           


Steps Policy Saved  3720
Epoch #435: test_reward: 437.738958 ± 216.571178, best_reward: 543.328792 ± 163.931681 in #414


Epoch #436: 20001it [01:03, 315.02it/s, env_step=8720000, len=1656, n/ep=0, n/st=400, pursuer_0/loss=1.468, pursuer_1/loss=1.763, pursuer_2/loss=1.807, pursuer_3/loss=1.566, pursuer_4/loss=1.632, pursuer_5/loss=1.596, pursuer_6/loss=1.749, pursuer_7/loss=1.640, rew=643.52]                           


Steps Policy Saved  3730
Epoch #436: test_reward: 427.753417 ± 218.246284, best_reward: 543.328792 ± 163.931681 in #414


Epoch #437: 20001it [01:04, 309.15it/s, env_step=8740000, len=1044, n/ep=0, n/st=400, pursuer_0/loss=1.572, pursuer_1/loss=1.680, pursuer_2/loss=1.558, pursuer_3/loss=1.602, pursuer_4/loss=1.563, pursuer_5/loss=1.710, pursuer_6/loss=1.747, pursuer_7/loss=1.738, rew=646.86]                           


Steps Policy Saved  3740
Epoch #437: test_reward: 455.009833 ± 224.180248, best_reward: 543.328792 ± 163.931681 in #414


Epoch #438: 20001it [01:04, 311.03it/s, env_step=8760000, len=992, n/ep=0, n/st=400, pursuer_0/loss=1.730, pursuer_1/loss=1.692, pursuer_2/loss=1.696, pursuer_3/loss=1.592, pursuer_4/loss=1.658, pursuer_5/loss=1.612, pursuer_6/loss=1.685, pursuer_7/loss=1.857, rew=610.53]                            


Epoch #438: test_reward: 422.078542 ± 205.509222, best_reward: 543.328792 ± 163.931681 in #414


Epoch #439: 20001it [01:02, 318.88it/s, env_step=8780000, len=2472, n/ep=0, n/st=400, pursuer_0/loss=1.598, pursuer_1/loss=1.706, pursuer_2/loss=1.624, pursuer_3/loss=1.595, pursuer_4/loss=1.603, pursuer_5/loss=1.636, pursuer_6/loss=1.567, pursuer_7/loss=1.799, rew=368.99]                           


Epoch #439: test_reward: 501.279542 ± 179.611274, best_reward: 543.328792 ± 163.931681 in #414


Epoch #440: 20001it [01:04, 311.17it/s, env_step=8800000, len=1200, n/ep=0, n/st=400, pursuer_0/loss=1.416, pursuer_1/loss=1.626, pursuer_2/loss=1.680, pursuer_3/loss=1.613, pursuer_4/loss=1.615, pursuer_5/loss=1.566, pursuer_6/loss=1.738, pursuer_7/loss=1.581, rew=587.63]                           


Epoch #440: test_reward: 379.315958 ± 168.208551, best_reward: 543.328792 ± 163.931681 in #414


Epoch #441: 20001it [01:04, 309.95it/s, env_step=8820000, len=2400, n/ep=0, n/st=400, pursuer_0/loss=1.455, pursuer_1/loss=1.549, pursuer_2/loss=1.656, pursuer_3/loss=1.555, pursuer_4/loss=1.595, pursuer_5/loss=1.596, pursuer_6/loss=1.703, pursuer_7/loss=1.687, rew=640.07]                           


Epoch #441: test_reward: 435.552250 ± 191.142405, best_reward: 543.328792 ± 163.931681 in #414


Epoch #442: 20001it [01:02, 320.58it/s, env_step=8840000, len=1128, n/ep=0, n/st=400, pursuer_0/loss=1.399, pursuer_1/loss=1.605, pursuer_2/loss=1.630, pursuer_3/loss=1.588, pursuer_4/loss=1.670, pursuer_5/loss=1.594, pursuer_6/loss=1.473, pursuer_7/loss=1.513, rew=405.28]                           


Epoch #442: test_reward: 436.333708 ± 220.478246, best_reward: 543.328792 ± 163.931681 in #414


Epoch #443: 20001it [01:02, 320.40it/s, env_step=8860000, len=3456, n/ep=0, n/st=400, pursuer_0/loss=1.258, pursuer_1/loss=1.364, pursuer_2/loss=1.436, pursuer_3/loss=1.523, pursuer_4/loss=1.395, pursuer_5/loss=1.455, pursuer_6/loss=1.536, pursuer_7/loss=1.567, rew=483.99]                           


Epoch #443: test_reward: 540.156792 ± 201.871343, best_reward: 543.328792 ± 163.931681 in #414


Epoch #444: 20001it [01:02, 322.49it/s, env_step=8880000, len=1360, n/ep=1, n/st=400, pursuer_0/loss=1.419, pursuer_1/loss=1.559, pursuer_2/loss=1.427, pursuer_3/loss=1.538, pursuer_4/loss=1.348, pursuer_5/loss=1.462, pursuer_6/loss=1.398, pursuer_7/loss=1.644, rew=560.32]                           


Epoch #444: test_reward: 456.071958 ± 145.951726, best_reward: 543.328792 ± 163.931681 in #414


Epoch #445: 20001it [01:03, 313.33it/s, env_step=8900000, len=2384, n/ep=0, n/st=400, pursuer_0/loss=1.563, pursuer_1/loss=1.458, pursuer_2/loss=1.524, pursuer_3/loss=1.523, pursuer_4/loss=1.636, pursuer_5/loss=1.656, pursuer_6/loss=1.684, pursuer_7/loss=1.646, rew=415.15]                           


Epoch #445: test_reward: 386.212250 ± 171.995249, best_reward: 543.328792 ± 163.931681 in #414


Epoch #446: 20001it [01:01, 326.27it/s, env_step=8920000, len=3288, n/ep=0, n/st=400, pursuer_0/loss=1.365, pursuer_1/loss=1.319, pursuer_2/loss=1.525, pursuer_3/loss=1.548, pursuer_4/loss=1.218, pursuer_5/loss=1.449, pursuer_6/loss=1.440, pursuer_7/loss=1.619, rew=239.82]                           


Epoch #446: test_reward: 445.763875 ± 180.489754, best_reward: 543.328792 ± 163.931681 in #414


Epoch #447: 20001it [01:04, 310.21it/s, env_step=8940000, len=720, n/ep=0, n/st=400, pursuer_0/loss=1.455, pursuer_1/loss=1.377, pursuer_2/loss=1.528, pursuer_3/loss=1.420, pursuer_4/loss=1.578, pursuer_5/loss=1.418, pursuer_6/loss=1.511, pursuer_7/loss=1.515, rew=571.11]                            


Epoch #447: test_reward: 449.006083 ± 184.221396, best_reward: 543.328792 ± 163.931681 in #414


Epoch #448: 20001it [01:01, 325.45it/s, env_step=8960000, len=856, n/ep=0, n/st=400, pursuer_0/loss=1.430, pursuer_1/loss=1.357, pursuer_2/loss=1.642, pursuer_3/loss=1.501, pursuer_4/loss=1.440, pursuer_5/loss=1.637, pursuer_6/loss=1.468, pursuer_7/loss=1.625, rew=743.09]                            


Epoch #448: test_reward: 540.605208 ± 177.078123, best_reward: 543.328792 ± 163.931681 in #414


Epoch #449: 20001it [01:02, 320.62it/s, env_step=8980000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.487, pursuer_1/loss=1.420, pursuer_2/loss=1.754, pursuer_3/loss=1.799, pursuer_4/loss=1.581, pursuer_5/loss=1.535, pursuer_6/loss=1.609, pursuer_7/loss=1.651, rew=149.02]                           


Epoch #449: test_reward: 480.124167 ± 183.754420, best_reward: 543.328792 ± 163.931681 in #414


Epoch #450: 20001it [01:05, 303.80it/s, env_step=9000000, len=3232, n/ep=0, n/st=400, pursuer_0/loss=1.416, pursuer_1/loss=1.407, pursuer_2/loss=1.517, pursuer_3/loss=1.593, pursuer_4/loss=1.454, pursuer_5/loss=1.485, pursuer_6/loss=1.576, pursuer_7/loss=1.612, rew=426.76]                           


Epoch #450: test_reward: 433.591625 ± 165.747774, best_reward: 543.328792 ± 163.931681 in #414


Epoch #451: 20001it [01:04, 309.34it/s, env_step=9020000, len=1136, n/ep=0, n/st=400, pursuer_0/loss=1.429, pursuer_1/loss=1.478, pursuer_2/loss=1.602, pursuer_3/loss=1.659, pursuer_4/loss=1.537, pursuer_5/loss=1.465, pursuer_6/loss=1.671, pursuer_7/loss=1.641, rew=796.29]                           


Epoch #451: test_reward: 442.805500 ± 224.230997, best_reward: 543.328792 ± 163.931681 in #414


Epoch #452: 20001it [01:06, 302.34it/s, env_step=9040000, len=3392, n/ep=0, n/st=400, pursuer_0/loss=1.384, pursuer_1/loss=1.264, pursuer_2/loss=1.643, pursuer_3/loss=1.692, pursuer_4/loss=1.542, pursuer_5/loss=1.518, pursuer_6/loss=1.451, pursuer_7/loss=1.653, rew=340.35]                           


Epoch #452: test_reward: 438.646208 ± 184.050345, best_reward: 543.328792 ± 163.931681 in #414


Epoch #453: 20001it [01:02, 322.07it/s, env_step=9060000, len=2848, n/ep=0, n/st=400, pursuer_0/loss=1.195, pursuer_1/loss=1.488, pursuer_2/loss=1.604, pursuer_3/loss=1.586, pursuer_4/loss=1.427, pursuer_5/loss=1.593, pursuer_6/loss=1.525, pursuer_7/loss=1.659, rew=394.44]                           


Epoch #453: test_reward: 453.764708 ± 213.565805, best_reward: 543.328792 ± 163.931681 in #414


Epoch #454: 20001it [01:02, 318.89it/s, env_step=9080000, len=1896, n/ep=0, n/st=400, pursuer_0/loss=1.536, pursuer_1/loss=1.503, pursuer_2/loss=1.663, pursuer_3/loss=1.586, pursuer_4/loss=1.595, pursuer_5/loss=1.596, pursuer_6/loss=1.692, pursuer_7/loss=1.782, rew=364.12]                           


Epoch #454: test_reward: 524.770500 ± 159.292567, best_reward: 543.328792 ± 163.931681 in #414


Epoch #455: 20001it [01:05, 303.59it/s, env_step=9100000, len=920, n/ep=0, n/st=400, pursuer_0/loss=1.539, pursuer_1/loss=1.553, pursuer_2/loss=1.656, pursuer_3/loss=1.677, pursuer_4/loss=1.494, pursuer_5/loss=1.746, pursuer_6/loss=1.765, pursuer_7/loss=1.735, rew=492.03]                            


Steps Policy Saved  3930
Epoch #455: test_reward: 478.997708 ± 183.017610, best_reward: 543.328792 ± 163.931681 in #414


Epoch #456: 20001it [01:05, 303.77it/s, env_step=9120000, len=808, n/ep=1, n/st=400, pursuer_0/loss=1.304, pursuer_1/loss=1.420, pursuer_2/loss=1.587, pursuer_3/loss=1.628, pursuer_4/loss=1.579, pursuer_5/loss=1.660, pursuer_6/loss=1.512, pursuer_7/loss=1.761, rew=536.73]                            


Epoch #456: test_reward: 441.955208 ± 187.778982, best_reward: 543.328792 ± 163.931681 in #414


Epoch #457: 20001it [01:07, 297.14it/s, env_step=9140000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.444, pursuer_1/loss=1.541, pursuer_2/loss=1.715, pursuer_3/loss=1.567, pursuer_4/loss=1.664, pursuer_5/loss=1.670, pursuer_6/loss=1.646, pursuer_7/loss=1.657, rew=134.98]                           


Epoch #457: test_reward: 478.621542 ± 169.577980, best_reward: 543.328792 ± 163.931681 in #414


Epoch #458: 20001it [01:06, 302.85it/s, env_step=9160000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.578, pursuer_1/loss=1.534, pursuer_2/loss=1.668, pursuer_3/loss=1.551, pursuer_4/loss=1.486, pursuer_5/loss=1.610, pursuer_6/loss=1.786, pursuer_7/loss=1.816, rew=104.04]                           


Epoch #458: test_reward: 457.051083 ± 197.218497, best_reward: 543.328792 ± 163.931681 in #414


Epoch #459: 20001it [01:07, 294.71it/s, env_step=9180000, len=2516, n/ep=0, n/st=400, pursuer_0/loss=1.531, pursuer_1/loss=1.522, pursuer_2/loss=1.610, pursuer_3/loss=1.720, pursuer_4/loss=1.478, pursuer_5/loss=1.556, pursuer_6/loss=1.722, pursuer_7/loss=1.840, rew=509.74]                           


Epoch #459: test_reward: 479.958042 ± 160.035750, best_reward: 543.328792 ± 163.931681 in #414


Epoch #460: 20001it [01:09, 285.77it/s, env_step=9200000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.618, pursuer_1/loss=1.534, pursuer_2/loss=1.740, pursuer_3/loss=1.828, pursuer_4/loss=1.768, pursuer_5/loss=1.711, pursuer_6/loss=1.741, pursuer_7/loss=1.853, rew=168.07]                           


Epoch #460: test_reward: 493.456875 ± 172.333265, best_reward: 543.328792 ± 163.931681 in #414


Epoch #461: 20001it [01:10, 285.22it/s, env_step=9220000, len=3112, n/ep=0, n/st=400, pursuer_0/loss=1.621, pursuer_1/loss=1.545, pursuer_2/loss=1.753, pursuer_3/loss=1.645, pursuer_4/loss=1.755, pursuer_5/loss=1.826, pursuer_6/loss=1.799, pursuer_7/loss=1.804, rew=367.28]                           


Best Saved Rew 3999
Epoch #461: test_reward: 558.807958 ± 159.124886, best_reward: 558.807958 ± 159.124886 in #461


Epoch #462: 20001it [01:03, 313.73it/s, env_step=9240000, len=1312, n/ep=0, n/st=400, pursuer_0/loss=1.473, pursuer_1/loss=1.602, pursuer_2/loss=1.672, pursuer_3/loss=1.634, pursuer_4/loss=1.538, pursuer_5/loss=1.805, pursuer_6/loss=1.770, pursuer_7/loss=1.797, rew=539.26]                           


Epoch #462: test_reward: 532.227917 ± 155.064698, best_reward: 558.807958 ± 159.124886 in #461


Epoch #463: 20001it [01:04, 311.03it/s, env_step=9260000, len=1172, n/ep=0, n/st=400, pursuer_0/loss=1.627, pursuer_1/loss=1.629, pursuer_2/loss=1.766, pursuer_3/loss=1.677, pursuer_4/loss=1.752, pursuer_5/loss=1.973, pursuer_6/loss=1.959, pursuer_7/loss=1.937, rew=464.35]                           


Epoch #463: test_reward: 449.741750 ± 144.429235, best_reward: 558.807958 ± 159.124886 in #461


Epoch #464: 20001it [01:06, 302.05it/s, env_step=9280000, len=1064, n/ep=0, n/st=400, pursuer_0/loss=1.579, pursuer_1/loss=1.627, pursuer_2/loss=1.740, pursuer_3/loss=1.667, pursuer_4/loss=1.665, pursuer_5/loss=1.633, pursuer_6/loss=1.668, pursuer_7/loss=1.790, rew=727.07]                           


Epoch #464: test_reward: 436.387500 ± 225.457664, best_reward: 558.807958 ± 159.124886 in #461


Epoch #465: 20001it [01:05, 303.17it/s, env_step=9300000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.615, pursuer_1/loss=1.730, pursuer_2/loss=1.897, pursuer_3/loss=1.728, pursuer_4/loss=1.756, pursuer_5/loss=1.776, pursuer_6/loss=1.682, pursuer_7/loss=1.869, rew=354.01]                           


Epoch #465: test_reward: 459.641292 ± 154.471461, best_reward: 558.807958 ± 159.124886 in #461


Epoch #466: 20001it [01:04, 311.05it/s, env_step=9320000, len=1088, n/ep=0, n/st=400, pursuer_0/loss=1.624, pursuer_1/loss=1.694, pursuer_2/loss=1.628, pursuer_3/loss=1.701, pursuer_4/loss=1.655, pursuer_5/loss=1.921, pursuer_6/loss=1.831, pursuer_7/loss=1.998, rew=730.27]                           


Epoch #466: test_reward: 510.011500 ± 208.292273, best_reward: 558.807958 ± 159.124886 in #461


Epoch #467: 20001it [01:03, 316.30it/s, env_step=9340000, len=1072, n/ep=0, n/st=400, pursuer_0/loss=1.665, pursuer_1/loss=1.662, pursuer_2/loss=1.806, pursuer_3/loss=1.597, pursuer_4/loss=1.796, pursuer_5/loss=1.844, pursuer_6/loss=1.863, pursuer_7/loss=1.970, rew=567.59]                           


Epoch #467: test_reward: 532.712667 ± 183.609500, best_reward: 558.807958 ± 159.124886 in #461


Epoch #468: 20001it [01:04, 310.36it/s, env_step=9360000, len=768, n/ep=0, n/st=400, pursuer_0/loss=1.689, pursuer_1/loss=1.638, pursuer_2/loss=1.815, pursuer_3/loss=1.759, pursuer_4/loss=1.668, pursuer_5/loss=1.890, pursuer_6/loss=1.994, pursuer_7/loss=1.905, rew=672.38]                            


Epoch #468: test_reward: 470.340792 ± 192.971767, best_reward: 558.807958 ± 159.124886 in #461


Epoch #469: 20001it [01:05, 307.53it/s, env_step=9380000, len=1744, n/ep=0, n/st=400, pursuer_0/loss=1.570, pursuer_1/loss=1.657, pursuer_2/loss=1.937, pursuer_3/loss=1.809, pursuer_4/loss=1.775, pursuer_5/loss=1.707, pursuer_6/loss=1.773, pursuer_7/loss=1.707, rew=635.16]                           


Epoch #469: test_reward: 489.712958 ± 196.453161, best_reward: 558.807958 ± 159.124886 in #461


Epoch #470: 20001it [01:00, 328.18it/s, env_step=9400000, len=1160, n/ep=0, n/st=400, pursuer_0/loss=1.731, pursuer_1/loss=1.598, pursuer_2/loss=1.867, pursuer_3/loss=1.741, pursuer_4/loss=1.861, pursuer_5/loss=1.869, pursuer_6/loss=1.886, pursuer_7/loss=1.930, rew=423.05]                           


Epoch #470: test_reward: 389.315542 ± 212.441294, best_reward: 558.807958 ± 159.124886 in #461


Epoch #471: 20001it [01:08, 291.26it/s, env_step=9420000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.680, pursuer_1/loss=1.746, pursuer_2/loss=1.799, pursuer_3/loss=1.706, pursuer_4/loss=1.667, pursuer_5/loss=1.717, pursuer_6/loss=1.970, pursuer_7/loss=1.917, rew=228.25]                           


Epoch #471: test_reward: 474.294708 ± 148.631737, best_reward: 558.807958 ± 159.124886 in #461


Epoch #472: 20001it [01:17, 257.67it/s, env_step=9440000, len=3064, n/ep=0, n/st=400, pursuer_0/loss=1.627, pursuer_1/loss=1.783, pursuer_2/loss=1.629, pursuer_3/loss=1.620, pursuer_4/loss=1.741, pursuer_5/loss=1.613, pursuer_6/loss=1.626, pursuer_7/loss=1.760, rew=478.27]                           


Epoch #472: test_reward: 453.704417 ± 149.558169, best_reward: 558.807958 ± 159.124886 in #461


Epoch #473: 20001it [01:10, 282.08it/s, env_step=9460000, len=1152, n/ep=1, n/st=400, pursuer_0/loss=1.644, pursuer_1/loss=1.629, pursuer_2/loss=1.465, pursuer_3/loss=1.527, pursuer_4/loss=1.799, pursuer_5/loss=1.761, pursuer_6/loss=1.609, pursuer_7/loss=1.750, rew=512.43]                           


Steps Policy Saved  4130
Epoch #473: test_reward: 419.230542 ± 196.132342, best_reward: 558.807958 ± 159.124886 in #461


Epoch #474: 20001it [01:10, 281.75it/s, env_step=9480000, len=920, n/ep=0, n/st=400, pursuer_0/loss=1.654, pursuer_1/loss=1.519, pursuer_2/loss=1.515, pursuer_3/loss=1.600, pursuer_4/loss=1.606, pursuer_5/loss=1.560, pursuer_6/loss=1.664, pursuer_7/loss=1.707, rew=531.31]                            


Steps Policy Saved  4140
Epoch #474: test_reward: 450.242750 ± 195.964058, best_reward: 558.807958 ± 159.124886 in #461


Epoch #475: 20001it [01:03, 315.65it/s, env_step=9500000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.498, pursuer_1/loss=1.721, pursuer_2/loss=1.732, pursuer_3/loss=1.533, pursuer_4/loss=1.671, pursuer_5/loss=1.575, pursuer_6/loss=1.635, pursuer_7/loss=1.633, rew=153.53]                           


Epoch #475: test_reward: 427.629083 ± 205.730612, best_reward: 558.807958 ± 159.124886 in #461


Epoch #476: 20001it [01:08, 290.65it/s, env_step=9520000, len=2200, n/ep=0, n/st=400, pursuer_0/loss=1.635, pursuer_1/loss=1.652, pursuer_2/loss=1.659, pursuer_3/loss=1.704, pursuer_4/loss=1.549, pursuer_5/loss=1.732, pursuer_6/loss=1.723, pursuer_7/loss=1.777, rew=409.81]                           


Epoch #476: test_reward: 466.639583 ± 207.912933, best_reward: 558.807958 ± 159.124886 in #461


Epoch #477: 20001it [01:04, 308.08it/s, env_step=9540000, len=1672, n/ep=0, n/st=400, pursuer_0/loss=1.495, pursuer_1/loss=1.683, pursuer_2/loss=1.687, pursuer_3/loss=1.619, pursuer_4/loss=1.477, pursuer_5/loss=1.661, pursuer_6/loss=1.721, pursuer_7/loss=1.704, rew=412.79]                           


Epoch #477: test_reward: 437.037333 ± 206.427143, best_reward: 558.807958 ± 159.124886 in #461


Epoch #478: 20001it [01:06, 298.58it/s, env_step=9560000, len=992, n/ep=0, n/st=400, pursuer_0/loss=1.413, pursuer_1/loss=1.580, pursuer_2/loss=1.722, pursuer_3/loss=1.622, pursuer_4/loss=1.589, pursuer_5/loss=1.736, pursuer_6/loss=1.623, pursuer_7/loss=1.736, rew=474.27]                            


Epoch #478: test_reward: 461.108083 ± 227.238235, best_reward: 558.807958 ± 159.124886 in #461


Epoch #479: 20001it [01:02, 320.04it/s, env_step=9580000, len=1808, n/ep=0, n/st=400, pursuer_0/loss=1.612, pursuer_1/loss=1.502, pursuer_2/loss=1.621, pursuer_3/loss=1.575, pursuer_4/loss=1.595, pursuer_5/loss=1.622, pursuer_6/loss=1.639, pursuer_7/loss=1.720, rew=729.03]                           


Epoch #479: test_reward: 503.922042 ± 222.484690, best_reward: 558.807958 ± 159.124886 in #461


Epoch #480: 20001it [01:07, 298.13it/s, env_step=9600000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.583, pursuer_1/loss=1.721, pursuer_2/loss=1.603, pursuer_3/loss=1.713, pursuer_4/loss=1.657, pursuer_5/loss=1.708, pursuer_6/loss=1.754, pursuer_7/loss=1.628, rew=173.55]                           


Best Saved Rew 4206
Epoch #480: test_reward: 570.825583 ± 121.130005, best_reward: 570.825583 ± 121.130005 in #480


Epoch #481: 20001it [01:06, 302.63it/s, env_step=9620000, len=1040, n/ep=0, n/st=400, pursuer_0/loss=1.450, pursuer_1/loss=1.577, pursuer_2/loss=1.682, pursuer_3/loss=1.708, pursuer_4/loss=1.734, pursuer_5/loss=1.699, pursuer_6/loss=1.721, pursuer_7/loss=1.752, rew=845.30]                           


Steps Policy Saved  4220
Epoch #481: test_reward: 517.950583 ± 150.840601, best_reward: 570.825583 ± 121.130005 in #480


Epoch #482: 20001it [01:07, 296.82it/s, env_step=9640000, len=848, n/ep=0, n/st=400, pursuer_0/loss=1.523, pursuer_1/loss=1.601, pursuer_2/loss=1.580, pursuer_3/loss=1.819, pursuer_4/loss=1.490, pursuer_5/loss=1.663, pursuer_6/loss=1.622, pursuer_7/loss=1.822, rew=764.03]                            


Steps Policy Saved  4230
Epoch #482: test_reward: 477.918542 ± 132.756350, best_reward: 570.825583 ± 121.130005 in #480


Epoch #483: 20001it [01:06, 301.26it/s, env_step=9660000, len=784, n/ep=1, n/st=400, pursuer_0/loss=1.411, pursuer_1/loss=1.781, pursuer_2/loss=1.638, pursuer_3/loss=1.714, pursuer_4/loss=1.666, pursuer_5/loss=1.755, pursuer_6/loss=1.538, pursuer_7/loss=1.761, rew=529.95]                            


Epoch #483: test_reward: 474.135292 ± 162.999374, best_reward: 570.825583 ± 121.130005 in #480


Epoch #484: 20001it [01:07, 294.73it/s, env_step=9680000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.530, pursuer_1/loss=1.632, pursuer_2/loss=1.738, pursuer_3/loss=1.773, pursuer_4/loss=1.658, pursuer_5/loss=1.749, pursuer_6/loss=1.778, pursuer_7/loss=1.702, rew=188.94]                           


Epoch #484: test_reward: 399.813542 ± 214.503282, best_reward: 570.825583 ± 121.130005 in #480


Epoch #485: 20001it [01:08, 294.00it/s, env_step=9700000, len=1928, n/ep=0, n/st=400, pursuer_0/loss=1.417, pursuer_1/loss=1.424, pursuer_2/loss=1.596, pursuer_3/loss=1.629, pursuer_4/loss=1.526, pursuer_5/loss=1.518, pursuer_6/loss=1.859, pursuer_7/loss=1.690, rew=340.12]                           


Epoch #485: test_reward: 519.304417 ± 203.162265, best_reward: 570.825583 ± 121.130005 in #480


Epoch #486: 20001it [01:02, 320.59it/s, env_step=9720000, len=3720, n/ep=1, n/st=400, pursuer_0/loss=1.499, pursuer_1/loss=1.480, pursuer_2/loss=1.522, pursuer_3/loss=1.784, pursuer_4/loss=1.545, pursuer_5/loss=1.486, pursuer_6/loss=1.589, pursuer_7/loss=1.704, rew=401.55]                           


Best Saved Rew 4278
Epoch #486: test_reward: 574.202000 ± 154.936748, best_reward: 574.202000 ± 154.936748 in #486


Epoch #487: 20001it [01:04, 312.44it/s, env_step=9740000, len=2304, n/ep=1, n/st=400, pursuer_0/loss=1.544, pursuer_1/loss=1.459, pursuer_2/loss=1.646, pursuer_3/loss=1.582, pursuer_4/loss=1.581, pursuer_5/loss=1.529, pursuer_6/loss=1.826, pursuer_7/loss=1.779, rew=459.06]                           


Epoch #487: test_reward: 479.235542 ± 153.752387, best_reward: 574.202000 ± 154.936748 in #486


Epoch #488: 20001it [01:03, 316.72it/s, env_step=9760000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.509, pursuer_1/loss=1.668, pursuer_2/loss=1.872, pursuer_3/loss=1.883, pursuer_4/loss=1.602, pursuer_5/loss=1.669, pursuer_6/loss=1.811, pursuer_7/loss=1.845, rew=260.99]                           


Epoch #488: test_reward: 529.070375 ± 161.955579, best_reward: 574.202000 ± 154.936748 in #486


Epoch #489: 20001it [01:03, 317.08it/s, env_step=9780000, len=3440, n/ep=0, n/st=400, pursuer_0/loss=1.428, pursuer_1/loss=1.684, pursuer_2/loss=1.620, pursuer_3/loss=1.678, pursuer_4/loss=1.574, pursuer_5/loss=1.616, pursuer_6/loss=1.765, pursuer_7/loss=1.914, rew=201.63]                           


Steps Policy Saved  4310
Epoch #489: test_reward: 510.391750 ± 185.597444, best_reward: 574.202000 ± 154.936748 in #486


Epoch #490: 20001it [01:03, 314.01it/s, env_step=9800000, len=1680, n/ep=0, n/st=400, pursuer_0/loss=1.566, pursuer_1/loss=1.583, pursuer_2/loss=1.776, pursuer_3/loss=1.898, pursuer_4/loss=1.561, pursuer_5/loss=1.666, pursuer_6/loss=1.835, pursuer_7/loss=1.891, rew=445.79]                           


Epoch #490: test_reward: 427.992042 ± 164.300491, best_reward: 574.202000 ± 154.936748 in #486


Epoch #491: 20001it [01:06, 300.15it/s, env_step=9820000, len=1588, n/ep=0, n/st=400, pursuer_0/loss=1.490, pursuer_1/loss=1.621, pursuer_2/loss=1.692, pursuer_3/loss=1.906, pursuer_4/loss=1.622, pursuer_5/loss=1.892, pursuer_6/loss=1.757, pursuer_7/loss=1.762, rew=542.08]                           


Epoch #491: test_reward: 528.064375 ± 188.061616, best_reward: 574.202000 ± 154.936748 in #486


Epoch #492: 20001it [01:04, 311.77it/s, env_step=9840000, len=1016, n/ep=0, n/st=400, pursuer_0/loss=1.375, pursuer_1/loss=1.648, pursuer_2/loss=1.675, pursuer_3/loss=1.822, pursuer_4/loss=1.587, pursuer_5/loss=1.675, pursuer_6/loss=1.835, pursuer_7/loss=1.848, rew=467.78]                           


Epoch #492: test_reward: 473.584667 ± 153.272335, best_reward: 574.202000 ± 154.936748 in #486


Epoch #493: 20001it [01:07, 298.48it/s, env_step=9860000, len=1248, n/ep=0, n/st=400, pursuer_0/loss=1.592, pursuer_1/loss=1.773, pursuer_2/loss=1.955, pursuer_3/loss=1.791, pursuer_4/loss=1.845, pursuer_5/loss=1.803, pursuer_6/loss=1.889, pursuer_7/loss=1.887, rew=729.47]                           


Epoch #493: test_reward: 491.472750 ± 224.352571, best_reward: 574.202000 ± 154.936748 in #486


Epoch #494: 20001it [01:06, 302.74it/s, env_step=9880000, len=1184, n/ep=0, n/st=400, pursuer_0/loss=1.645, pursuer_1/loss=1.836, pursuer_2/loss=2.014, pursuer_3/loss=2.055, pursuer_4/loss=1.777, pursuer_5/loss=1.830, pursuer_6/loss=2.039, pursuer_7/loss=2.018, rew=655.73]                           


Epoch #494: test_reward: 516.218458 ± 210.275030, best_reward: 574.202000 ± 154.936748 in #486


Epoch #495: 20001it [01:04, 309.06it/s, env_step=9900000, len=816, n/ep=1, n/st=400, pursuer_0/loss=1.413, pursuer_1/loss=1.582, pursuer_2/loss=1.829, pursuer_3/loss=1.759, pursuer_4/loss=1.752, pursuer_5/loss=1.697, pursuer_6/loss=1.949, pursuer_7/loss=1.869, rew=486.92]                            


Epoch #495: test_reward: 561.154542 ± 159.938826, best_reward: 574.202000 ± 154.936748 in #486


Epoch #496: 20001it [01:01, 324.13it/s, env_step=9920000, len=1296, n/ep=0, n/st=400, pursuer_0/loss=1.503, pursuer_1/loss=1.627, pursuer_2/loss=1.754, pursuer_3/loss=1.870, pursuer_4/loss=1.735, pursuer_5/loss=1.684, pursuer_6/loss=1.895, pursuer_7/loss=1.766, rew=478.80]                           


Epoch #496: test_reward: 550.792125 ± 137.001368, best_reward: 574.202000 ± 154.936748 in #486


Epoch #497: 20001it [01:04, 309.78it/s, env_step=9940000, len=2664, n/ep=0, n/st=400, pursuer_0/loss=1.494, pursuer_1/loss=1.632, pursuer_2/loss=1.945, pursuer_3/loss=1.921, pursuer_4/loss=1.787, pursuer_5/loss=1.728, pursuer_6/loss=1.895, pursuer_7/loss=1.947, rew=623.03]                           


Steps Policy Saved  4410
Epoch #497: test_reward: 516.818083 ± 183.183381, best_reward: 574.202000 ± 154.936748 in #486


Epoch #498: 20001it [01:03, 316.64it/s, env_step=9960000, len=2664, n/ep=1, n/st=400, pursuer_0/loss=1.608, pursuer_1/loss=1.682, pursuer_2/loss=1.864, pursuer_3/loss=1.751, pursuer_4/loss=1.738, pursuer_5/loss=1.636, pursuer_6/loss=1.724, pursuer_7/loss=1.908, rew=383.34]                           


Best Saved Rew 4424
Epoch #498: test_reward: 588.328083 ± 134.371802, best_reward: 588.328083 ± 134.371802 in #498


Epoch #499: 20001it [01:06, 300.84it/s, env_step=9980000, len=2080, n/ep=0, n/st=400, pursuer_0/loss=1.696, pursuer_1/loss=1.648, pursuer_2/loss=1.904, pursuer_3/loss=1.802, pursuer_4/loss=1.760, pursuer_5/loss=1.852, pursuer_6/loss=2.036, pursuer_7/loss=2.011, rew=522.62]                           


Epoch #499: test_reward: 560.977333 ± 202.116619, best_reward: 588.328083 ± 134.371802 in #498


Epoch #500: 20001it [01:06, 300.11it/s, env_step=10000000, len=816, n/ep=1, n/st=400, pursuer_0/loss=1.800, pursuer_1/loss=1.898, pursuer_2/loss=2.065, pursuer_3/loss=1.969, pursuer_4/loss=1.928, pursuer_5/loss=1.930, pursuer_6/loss=1.971, pursuer_7/loss=2.003, rew=626.69]                           


Steps Policy Saved  4450
Epoch #500: test_reward: 494.508750 ± 123.489400, best_reward: 588.328083 ± 134.371802 in #498

{'duration': '79077.89s', 'train_time/model': '19926.51s', 'test_step': 43588296, 'test_episode': 15030, 'test_time': '47048.52s', 'test_speed': '926.45 step/s', 'best_reward': 588.3280833333332, 'best_result': '588.33 ± 134.37', 'train_step': 10000000, 'train_episode': 4388, 'train_time/collector': '12102.86s', 'train_speed': '312.21 step/s'}

(the trained policy can be accessed via policy.policies[agents[0]])


In [3]:
torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + ".pth")
print("Steps Policy Saved " , str(global_step_holder[0]))
            

Steps Policy Saved  4451


In [4]:
def _get_envT():
    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    # env = pursuit_v4.env()

    env = TaskPursuitEnv.env(
            max_cycles=SISL_Config["max_cycles"],
            x_size=SISL_Config["x_size"],
            y_size=SISL_Config["y_size"],
            shared_reward=SISL_Config["shared_reward"],
            n_evaders=SISL_Config["n_evaders"],
            n_pursuers=SISL_Config["n_pursuers"],
            obs_range=SISL_Config["obs_range"],
            n_catch=SISL_Config["n_catch"],
            freeze_evaders=SISL_Config["freeze_evaders"],
            tag_reward=SISL_Config["tag_reward"],
            catch_reward=SISL_Config["catch_reward"],
            urgency_reward=SISL_Config["urgency_reward"],
            surround=SISL_Config["surround"],
            constraint_window=SISL_Config["constraint_window"],
            # att_memory = SISL_Config["att_memory"],
            #render_mode= "human"#True
            render_mode= None#"html"#"human" #"human"#True
    )
           
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    
    return PettingZooEnv(env)   


policy, optim, agents = _get_agents()
test_env_num = 1
 # ======== Step 1: Environment setup =========

test_envs = DummyVectorEnv([_get_envT for _ in range(test_env_num)]) 

# seed
seed = 100
np.random.seed(seed)

torch.manual_seed(seed)
test_envs.seed(seed)

episodes =  1
render  = False
# Load the saved checkpoint
for agent in agents:    
    
    # if Policy_Config["same_policy"]:
    #     model_path = os.path.join("dqn_SISL", name + ".pth")                            
    # else:
    #     model_path = os.path.join("dqn_SISL", name + agent + ".pth") 

    policy.policies[agent].set_eps(0.00)
    # policy.policies[agent].load_state_dict(torch.load(model_load_path))
    policy.policies[agent].eval()
    
# envs = DummyVectorEnv([_get_env for _ in range(1)])

collector = CustomCollector(policy, test_envs, exploration_noise=False)

results = collector.collect(n_episode=episodes, render=0.02 if render else None)#0.02)#, gym_reset_kwargs={'seed' :2})

print("FinalRew: ", np.sum(results['rews'], axis = 1))
print("Finished: ", results['lens'] , " Steps")



RuntimeError: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 4