In [1]:
from logging import config
import os
import datetime
from typing import Optional, Tuple
import json

os.environ["WANDB_NOTEBOOK_NAME"] = "Tianshow_Centralized_Training"

import numpy as np
import torch
from tianshou.data import Collector, VectorReplayBuffer, PrioritizedVectorReplayBuffer
from tianshou.env import DummyVectorEnv
from tianshou.env.pettingzoo_env import PettingZooEnv
from tianshou.policy import BasePolicy, DQNPolicy, MultiAgentPolicyManager, RandomPolicy, RainbowPolicy
from tianshou.trainer import OffpolicyTrainer
from torch.utils.tensorboard import SummaryWriter

from pettingzoo.sisl import pursuit_v4

from TaskAllocation.RL_Policies.MultiHead_SISL import MultiHead_SISL
from TaskAllocation.RL_Policies.DNN_SISL import DNN_SISL
from TaskAllocation.RL_Policies.CNN_SISL import CNN_SISL

from TaskAllocation.RL_Policies.Custom_Classes import CustomNet
from TaskAllocation.RL_Policies.Custom_Classes import CustomCollector
from TaskAllocation.RL_Policies.Custom_Classes import CustomParallelToAECWrapper

from tianshou.utils import WandbLogger


#from tianshou_DQN import train
model  =  "CNN_SISL" #"MultiHead_SISL" 
test_num  =  "_SISL_NOV12_Emb128"
policyModel  =  "DQN"

train_env_num = 10
test_env_num = 10

name = model + test_num

load_policy_name = f'policy_CNN_SISL_SISL_NOV12_Emb128.pth'
save_policy_name = f'policy_{name}'
policy_path = "dqn_SISL"

same_policy = True
load_model = False

# log
now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
log_name = name + str(now)

log_path = os.path.join('./', "Logs", "dqn_sisl", log_name)

SISL_Config = {
    "max_cycles": 500,         # default: 500
    "x_size": 16,              # default: 16
    "y_size": 16,              # default: 16
    "shared_reward": False,     # default: True
    "n_evaders": 30,           # default: 30
    "n_pursuers": 8,           # default: 10
    "obs_range": 7,            # default: 7
    "n_catch": 2,              # default: 2
    "freeze_evaders": False,   # default: False
    "tag_reward": 0.01,        # default: 0.01
    "catch_reward": 5.0,       # default: 5.0
    "urgency_reward": -0.1,    # default: -0.1
    "surround": True,         # default: True
    "constraint_window": 1.0   # default: 1.0
}

max_cycles = SISL_Config["max_cycles"]
n_agents = SISL_Config["n_pursuers"]

dqn_params = {"discount_factor": 0.98, 
              "estimation_step": 20, 
              "target_update_freq": 1000,#max_cycles * n_agents,
              "optminizer": "Adam",
              "lr": 0.00016 }

trainer_params = {"max_epoch": 500,
                  "step_per_epoch": 20000,#5 * (150 * n_agents),
                  "step_per_collect": 400,# * (10 * n_agents),
                  "episode_per_test": 20,
                  "batch_size" : 32 * n_agents,
                  "update_per_step": 1 / 50, #Only run after close a Collect (run many times as necessary to meet the value)
                  "tn_eps_max": 0.1,
                  "ts_eps_max": 0.01,
                  }

runConfig = dqn_params
runConfig.update(trainer_params) 
runConfig.update(SISL_Config)

model_load_path = os.path.join(policy_path, load_policy_name)  
model_save_path = os.path.join(policy_path, save_policy_name)        
os.makedirs(os.path.join(policy_path), exist_ok=True)  
os.makedirs(os.path.join(log_path), exist_ok=True)

def _get_agents(
    agent_learn: Optional[BasePolicy] = None,
    agent_opponent: Optional[BasePolicy] = None,
    optim: Optional[torch.optim.Optimizer] = None,
    policy_load_path = None,
) -> Tuple[BasePolicy, torch.optim.Optimizer, list]:
    
    env = _get_env()       
    agent_observation_space = env.observation_space
   
    action_shape = env.action_space
    
    device="cuda" if torch.cuda.is_available() else "cpu"          
    
    if agent_learn is None:      
        
        if model == "MultiHead_SISL":
            net = MultiHead_SISL(
                obs_shape=agent_observation_space,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "DNN_SISL":
            net = DNN_SISL(
                obs_shape=agent_observation_space,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "CNN_SISL":
            net = CNN_SISL(
                obs_shape=agent_observation_space.shape,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)
           

        if optim is None:
            optim = torch.optim.Adam(net.parameters(), lr=dqn_params["lr"], weight_decay=0.0, amsgrad= True )                
    
        if policyModel == "DQN":
            agent_learn = DQNPolicy(
                model=net,
                optim=optim,
                action_space = action_shape,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
                reward_normalization = False,
                clip_loss_grad = False 
            ) 
                     

        if policyModel == "Rainbow":
            agent_learn = RainbowPolicy(
                model=net.to(device),
                optim=optim,
                action_space = action_shape,
                num_atoms= 5,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
            ) 
         
 
        if load_model is True:
            # Load the saved checkpoint             
            agent_learn.load_state_dict(torch.load(model_load_path))
            print(f'Loaded-> {model_load_path}')
                   
        #print(env.agents)
        #agents = [agent_learn for _ in range(len(env.agents))]
        
        agents = [agent_learn for _ in range(len(env.agents))]

        
    policy = MultiAgentPolicyManager(policies = agents, env=env)  
        
    return policy, optim, env.agents

def _get_env():
    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    # env = pursuit_v4.env()
    env = pursuit_v4.env(
            max_cycles=SISL_Config["max_cycles"],
            x_size=SISL_Config["x_size"],
            y_size=SISL_Config["y_size"],
            shared_reward=SISL_Config["shared_reward"],
            n_evaders=SISL_Config["n_evaders"],
            n_pursuers=SISL_Config["n_pursuers"],
            obs_range=SISL_Config["obs_range"],
            n_catch=SISL_Config["n_catch"],
            freeze_evaders=SISL_Config["freeze_evaders"],
            tag_reward=SISL_Config["tag_reward"],
            catch_reward=SISL_Config["catch_reward"],
            urgency_reward=SISL_Config["urgency_reward"],
            surround=SISL_Config["surround"],
            constraint_window=SISL_Config["constraint_window"]
        )

    
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    
    return PettingZooEnv(env)

    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    env = pursuit_v4.env(
        x_size = 5,
        y_size = 5,
        max_cycles = 100,
        shared_reward = True,
        n_evaders = 10,
        n_pursuers = 4,
        obs_range = 5,
        n_catch = 2,
        surround=False,
        freeze_evaders = False,
        render_mode= None)#"human" if test else None)  
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    
    return PettingZooEnv(env)

print(json.dumps(runConfig, indent=4))


{
    "discount_factor": 0.98,
    "estimation_step": 20,
    "target_update_freq": 1000,
    "optminizer": "Adam",
    "lr": 0.00016,
    "max_epoch": 500,
    "step_per_epoch": 20000,
    "step_per_collect": 400,
    "episode_per_test": 20,
    "batch_size": 256,
    "update_per_step": 0.02,
    "tn_eps_max": 0.1,
    "ts_eps_max": 0.01,
    "max_cycles": 500,
    "x_size": 16,
    "y_size": 16,
    "shared_reward": false,
    "n_evaders": 30,
    "n_pursuers": 8,
    "obs_range": 7,
    "n_catch": 2,
    "freeze_evaders": false,
    "tag_reward": 0.01,
    "catch_reward": 5.0,
    "urgency_reward": -0.1,
    "surround": true,
    "constraint_window": 1.0
}


In [2]:
if __name__ == "__main__":
                        
    torch.set_grad_enabled(True) 
   
    # ======== Step 1: Environment setup =========
    train_envs = DummyVectorEnv([_get_env for _ in range(train_env_num)])
    test_envs = DummyVectorEnv([_get_env for _ in range(test_env_num)]) 

    # seed
    seed = 0
    np.random.seed(seed)
    
    torch.manual_seed(seed)

    train_envs.seed(seed)
    test_envs.seed(seed)

    # ======== Step 2: Agent setup =========
    policy, optim, agents = _get_agents()    

    # ======== Step 3: Collector setup =========
    train_collector = Collector(
        policy,
        train_envs,
        VectorReplayBuffer(300_000, len(train_envs)),
        # PrioritizedVectorReplayBuffer( 300_000, len(train_envs), alpha=0.6, beta=0.4) , 
        #ListReplayBuffer(100000)       
        exploration_noise=True             
    )
    test_collector = Collector(policy, test_envs, exploration_noise=True)
     
    print("Buffer Warming Up ")    
    for i in range(1):#int(trainer_params['batch_size'] / (300 * 10 ) )):
        
        train_collector.collect(n_episode=train_env_num)#,random=True) #trainer_params['batch_size'] * train_env_num))
        #train_collector.collect(n_step=300 * 10)
        print(".", end="") 
    
    len_buffer = len(train_collector.buffer) / (SISL_Config["max_cycles"] * SISL_Config["n_pursuers"])
    print("\nBuffer Lenght: ", len_buffer ) 
    
    info = { "Buffer"  : "ReplayBuffer", " Warmup_ep" : len_buffer}
    # ======== tensorboard logging setup =========                       
    logger = WandbLogger(
        train_interval = runConfig["max_cycles"] * runConfig["n_pursuers"] ,
        test_interval = 1,#runConfig["max_cycles"] * runConfig["n_pursuers"],
        update_interval = runConfig["max_cycles"],
        save_interval = 1,
        write_flush = True,
        project = "SISL_Eval01",
        name = log_name,
        entity = None,
        run_id = log_name,
        config = runConfig,
        monitor_gym = True )
    
    writer = SummaryWriter(log_path)    
    writer.add_text("args", str(runConfig))    
    logger.load(writer)
    
    # ======== Step 4: Callback functions setup =========
    def save_best_fn(policy):                
        
        torch.save(policy.policies[agents[0]].state_dict(), model_save_path + ".pth")
        print("Best Saved")
        

    def stop_fn(mean_rewards):
        return mean_rewards >= 99999939.0

    def train_fn(epoch, env_step):
        epsilon = trainer_params['tn_eps_max'] - (trainer_params['tn_eps_max'] - trainer_params['tn_eps_max']/100)*(epoch/trainer_params['max_epoch'])          
        if same_policy:
            policy.policies[agents[0]].set_eps(epsilon)
        else:
            policy.policies['R_agent0'].set_eps(epsilon)
            policy.policies['F_agent0'].set_eps(epsilon)
        
        # if env_step % 500 == 0:
            # logger.write("train/env_step", env_step, {"train/eps": eps})


    def test_fn(epoch, env_step):
        epsilon = trainer_params['ts_eps_max']#0.01#max(0.001, 0.1 - epoch * 0.001)
        if same_policy:
            policy.policies[agents[0]].set_eps(epsilon)
        else:
            policy.policies['R_agent0'].set_eps(epsilon)
            policy.policies['F_agent0'].set_eps(epsilon)

        
    def reward_metric(rews):       
        
        
        return np.sum(rews, axis = 1)


    # #Define the hook function
    # def register_activation_hook(module, input, output, layer_name, writer, global_step_holder):
    #     #print(f"Hook executed for {layer_name} at step {global_step_holder[0]}")
    #     if isinstance(output, tuple):
    #         output = output[0]  # If the output is a tuple, use the first element
    #     writer.add_histogram(f"activations/{layer_name}", output, global_step_holder[0])

    # # ======== Step 5: Run the trainer =========
    offPolicyTrainer = OffpolicyTrainer(
        policy=policy,
        train_collector=train_collector,
        test_collector=test_collector,        
        max_epoch=trainer_params['max_epoch'],
        step_per_epoch=trainer_params['step_per_epoch'],
        step_per_collect=trainer_params['step_per_collect'],        
        episode_per_test= trainer_params['episode_per_test'],
        batch_size=trainer_params['batch_size'],
        train_fn=train_fn,
        test_fn=test_fn,
        stop_fn=stop_fn,
        save_best_fn=save_best_fn,
        update_per_step=trainer_params['update_per_step'],
        logger=logger,
        test_in_train=True,
        reward_metric=reward_metric,
        show_progress = True 
               
        )
    
    result = offPolicyTrainer.run()
    writer.close()
    # return result, policy.policies[agents[1]]
    print(f"\n==========Result==========\n{result}")
    print("\n(the trained policy can be accessed via policy.policies[agents[0]])")



Buffer Warming Up 




.
Buffer Lenght:  10.0


[34m[1mwandb[0m: Currently logged in as: [33mandrekuros[0m. Use [1m`wandb login --relogin`[0m to force relogin
  from IPython.core.display import HTML, display  # type: ignore


Best Saved


Epoch #1: 20001it [01:07, 295.80it/s, env_step=20000, len=0, n/ep=0, n/st=400, pursuer_0/loss=0.065, pursuer_1/loss=0.034, pursuer_2/loss=0.037, pursuer_3/loss=0.034, pursuer_4/loss=0.045, pursuer_5/loss=0.071, pursuer_6/loss=0.036, pursuer_7/loss=0.058, rew=0.00]                           


Best Saved
Epoch #1: test_reward: -336.240375 ± 34.408222, best_reward: -336.240375 ± 34.408222 in #1


Epoch #2: 20001it [01:02, 318.19it/s, env_step=40000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.153, pursuer_1/loss=0.076, pursuer_2/loss=0.066, pursuer_3/loss=0.171, pursuer_4/loss=0.144, pursuer_5/loss=0.131, pursuer_6/loss=0.174, pursuer_7/loss=0.253, rew=-236.77]                           


Best Saved
Epoch #2: test_reward: -151.779063 ± 63.985415, best_reward: -151.779063 ± 63.985415 in #2


Epoch #3: 20001it [01:03, 313.53it/s, env_step=60000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.141, pursuer_1/loss=0.210, pursuer_2/loss=0.210, pursuer_3/loss=0.196, pursuer_4/loss=0.267, pursuer_5/loss=0.334, pursuer_6/loss=0.231, pursuer_7/loss=0.308, rew=-236.77]                           


Best Saved
Epoch #3: test_reward: -116.444250 ± 44.926807, best_reward: -116.444250 ± 44.926807 in #3


Epoch #4: 20001it [01:00, 330.83it/s, env_step=80000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.185, pursuer_1/loss=0.261, pursuer_2/loss=0.156, pursuer_3/loss=0.232, pursuer_4/loss=0.318, pursuer_5/loss=0.325, pursuer_6/loss=0.270, pursuer_7/loss=0.362, rew=-90.35]                           


Best Saved
Epoch #4: test_reward: 40.350687 ± 76.332140, best_reward: 40.350687 ± 76.332140 in #4


Epoch #5: 20001it [01:02, 317.69it/s, env_step=100000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.254, pursuer_1/loss=0.297, pursuer_2/loss=0.322, pursuer_3/loss=0.331, pursuer_4/loss=0.379, pursuer_5/loss=0.417, pursuer_6/loss=0.386, pursuer_7/loss=0.402, rew=-90.35]                           


Epoch #5: test_reward: -39.847313 ± 60.653988, best_reward: 40.350687 ± 76.332140 in #4


Epoch #6: 20001it [01:01, 323.48it/s, env_step=120000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.225, pursuer_1/loss=0.265, pursuer_2/loss=0.258, pursuer_3/loss=0.321, pursuer_4/loss=0.379, pursuer_5/loss=0.442, pursuer_6/loss=0.373, pursuer_7/loss=0.417, rew=-2.71]                           


Epoch #6: test_reward: -20.006625 ± 67.601233, best_reward: 40.350687 ± 76.332140 in #4


Epoch #7: 20001it [01:04, 309.00it/s, env_step=140000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.361, pursuer_1/loss=0.337, pursuer_2/loss=0.396, pursuer_3/loss=0.315, pursuer_4/loss=0.409, pursuer_5/loss=0.449, pursuer_6/loss=0.450, pursuer_7/loss=0.461, rew=-2.71]                           


Epoch #7: test_reward: -65.185500 ± 71.156741, best_reward: 40.350687 ± 76.332140 in #4


Epoch #8: 20001it [01:01, 325.89it/s, env_step=160000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.347, pursuer_1/loss=0.338, pursuer_2/loss=0.395, pursuer_3/loss=0.418, pursuer_4/loss=0.474, pursuer_5/loss=0.486, pursuer_6/loss=0.463, pursuer_7/loss=0.531, rew=21.69]                           


Epoch #8: test_reward: 11.838875 ± 74.806771, best_reward: 40.350687 ± 76.332140 in #4


Epoch #9: 20001it [01:03, 313.00it/s, env_step=180000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.395, pursuer_1/loss=0.456, pursuer_2/loss=0.498, pursuer_3/loss=0.452, pursuer_4/loss=0.594, pursuer_5/loss=0.584, pursuer_6/loss=0.505, pursuer_7/loss=0.673, rew=21.69]                           


Epoch #9: test_reward: -13.345188 ± 56.762957, best_reward: 40.350687 ± 76.332140 in #4


Epoch #10: 20001it [00:57, 350.10it/s, env_step=200000, len=4000, n/ep=10, n/st=400, pursuer_0/loss=0.413, pursuer_1/loss=0.368, pursuer_2/loss=0.515, pursuer_3/loss=0.482, pursuer_4/loss=0.513, pursuer_5/loss=0.611, pursuer_6/loss=0.573, pursuer_7/loss=0.569, rew=94.52]                           


Best Saved
Epoch #10: test_reward: 140.479250 ± 94.230302, best_reward: 140.479250 ± 94.230302 in #10


Epoch #11: 20001it [01:03, 317.45it/s, env_step=220000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.414, pursuer_1/loss=0.470, pursuer_2/loss=0.494, pursuer_3/loss=0.498, pursuer_4/loss=0.560, pursuer_5/loss=0.628, pursuer_6/loss=0.627, pursuer_7/loss=0.578, rew=94.52]                           


Epoch #11: test_reward: 60.949062 ± 59.503894, best_reward: 140.479250 ± 94.230302 in #10


Epoch #12: 20001it [00:59, 335.55it/s, env_step=240000, len=4000, n/ep=7, n/st=400, pursuer_0/loss=0.414, pursuer_1/loss=0.407, pursuer_2/loss=0.431, pursuer_3/loss=0.468, pursuer_4/loss=0.591, pursuer_5/loss=0.569, pursuer_6/loss=0.563, pursuer_7/loss=0.592, rew=105.66]                           


Epoch #12: test_reward: 89.133750 ± 86.813714, best_reward: 140.479250 ± 94.230302 in #10


Epoch #13: 20001it [01:00, 331.08it/s, env_step=260000, len=1968, n/ep=1, n/st=400, pursuer_0/loss=0.513, pursuer_1/loss=0.513, pursuer_2/loss=0.518, pursuer_3/loss=0.524, pursuer_4/loss=0.673, pursuer_5/loss=0.547, pursuer_6/loss=0.619, pursuer_7/loss=0.692, rew=326.08]                           


Epoch #13: test_reward: 34.542000 ± 67.706211, best_reward: 140.479250 ± 94.230302 in #10


Epoch #14: 20001it [00:55, 357.91it/s, env_step=280000, len=4000, n/ep=5, n/st=400, pursuer_0/loss=0.509, pursuer_1/loss=0.583, pursuer_2/loss=0.585, pursuer_3/loss=0.649, pursuer_4/loss=0.644, pursuer_5/loss=0.681, pursuer_6/loss=0.695, pursuer_7/loss=0.749, rew=134.27]                           


Epoch #14: test_reward: 86.305187 ± 78.374402, best_reward: 140.479250 ± 94.230302 in #10


Epoch #15: 20001it [00:54, 364.48it/s, env_step=300000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.620, pursuer_1/loss=0.718, pursuer_2/loss=0.667, pursuer_3/loss=0.652, pursuer_4/loss=0.736, pursuer_5/loss=0.899, pursuer_6/loss=0.815, pursuer_7/loss=0.858, rew=53.04]                            


Epoch #15: test_reward: 43.845500 ± 48.344626, best_reward: 140.479250 ± 94.230302 in #10


Epoch #16: 20001it [00:55, 361.67it/s, env_step=320000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.697, pursuer_1/loss=0.895, pursuer_2/loss=0.738, pursuer_3/loss=0.801, pursuer_4/loss=0.820, pursuer_5/loss=0.963, pursuer_6/loss=0.894, pursuer_7/loss=1.041, rew=132.01]                           


Epoch #16: test_reward: 97.584687 ± 89.650391, best_reward: 140.479250 ± 94.230302 in #10


Epoch #17: 20001it [00:57, 349.99it/s, env_step=340000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.755, pursuer_1/loss=0.801, pursuer_2/loss=0.747, pursuer_3/loss=0.770, pursuer_4/loss=0.804, pursuer_5/loss=0.854, pursuer_6/loss=0.864, pursuer_7/loss=1.009, rew=42.72]                            


Epoch #17: test_reward: 87.828187 ± 66.827613, best_reward: 140.479250 ± 94.230302 in #10


Epoch #18: 20001it [00:57, 348.96it/s, env_step=360000, len=2016, n/ep=1, n/st=400, pursuer_0/loss=0.830, pursuer_1/loss=0.747, pursuer_2/loss=0.793, pursuer_3/loss=0.894, pursuer_4/loss=0.965, pursuer_5/loss=0.994, pursuer_6/loss=0.973, pursuer_7/loss=1.025, rew=308.63]                           


Epoch #18: test_reward: 63.624312 ± 45.158286, best_reward: 140.479250 ± 94.230302 in #10


Epoch #19: 20001it [00:58, 344.05it/s, env_step=380000, len=3620, n/ep=0, n/st=400, pursuer_0/loss=0.825, pursuer_1/loss=0.749, pursuer_2/loss=0.833, pursuer_3/loss=0.867, pursuer_4/loss=1.065, pursuer_5/loss=0.887, pursuer_6/loss=0.937, pursuer_7/loss=0.961, rew=133.15]                           


Epoch #19: test_reward: 66.571875 ± 102.039767, best_reward: 140.479250 ± 94.230302 in #10


Epoch #20: 20001it [00:58, 344.11it/s, env_step=400000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.853, pursuer_1/loss=0.771, pursuer_2/loss=0.804, pursuer_3/loss=0.813, pursuer_4/loss=0.937, pursuer_5/loss=0.867, pursuer_6/loss=0.843, pursuer_7/loss=0.997, rew=96.49]                            


Epoch #20: test_reward: 75.453625 ± 89.807467, best_reward: 140.479250 ± 94.230302 in #10


Epoch #21: 20001it [00:59, 334.32it/s, env_step=420000, len=1840, n/ep=0, n/st=400, pursuer_0/loss=0.851, pursuer_1/loss=0.948, pursuer_2/loss=0.871, pursuer_3/loss=1.041, pursuer_4/loss=1.034, pursuer_5/loss=1.047, pursuer_6/loss=1.001, pursuer_7/loss=1.115, rew=347.19]                           


Epoch #21: test_reward: 3.154125 ± 56.556491, best_reward: 140.479250 ± 94.230302 in #10


Epoch #22: 20001it [00:58, 344.34it/s, env_step=440000, len=1464, n/ep=0, n/st=400, pursuer_0/loss=0.807, pursuer_1/loss=0.728, pursuer_2/loss=0.817, pursuer_3/loss=0.889, pursuer_4/loss=0.927, pursuer_5/loss=0.899, pursuer_6/loss=0.981, pursuer_7/loss=0.985, rew=334.10]                           


Epoch #22: test_reward: 121.630562 ± 113.781615, best_reward: 140.479250 ± 94.230302 in #10


Epoch #23: 20001it [00:57, 344.97it/s, env_step=460000, len=3920, n/ep=0, n/st=400, pursuer_0/loss=0.848, pursuer_1/loss=0.900, pursuer_2/loss=0.807, pursuer_3/loss=0.918, pursuer_4/loss=1.081, pursuer_5/loss=1.011, pursuer_6/loss=0.986, pursuer_7/loss=0.982, rew=31.13]                           


Epoch #23: test_reward: 129.302312 ± 189.032893, best_reward: 140.479250 ± 94.230302 in #10


Epoch #24: 20001it [00:58, 343.31it/s, env_step=480000, len=2944, n/ep=0, n/st=400, pursuer_0/loss=0.957, pursuer_1/loss=0.851, pursuer_2/loss=0.777, pursuer_3/loss=0.939, pursuer_4/loss=0.960, pursuer_5/loss=0.911, pursuer_6/loss=1.015, pursuer_7/loss=0.964, rew=288.67]                           


Epoch #24: test_reward: 112.201500 ± 106.211541, best_reward: 140.479250 ± 94.230302 in #10


Epoch #25: 20001it [00:58, 344.00it/s, env_step=500000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.823, pursuer_1/loss=0.789, pursuer_2/loss=0.859, pursuer_3/loss=0.877, pursuer_4/loss=0.929, pursuer_5/loss=0.915, pursuer_6/loss=0.982, pursuer_7/loss=0.979, rew=196.75]                           


Epoch #25: test_reward: 97.464062 ± 108.638354, best_reward: 140.479250 ± 94.230302 in #10


Epoch #26: 20001it [01:00, 333.32it/s, env_step=520000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.872, pursuer_1/loss=0.935, pursuer_2/loss=0.895, pursuer_3/loss=1.093, pursuer_4/loss=0.968, pursuer_5/loss=0.994, pursuer_6/loss=1.133, pursuer_7/loss=1.220, rew=26.33]                           


Epoch #26: test_reward: 24.987250 ± 75.057225, best_reward: 140.479250 ± 94.230302 in #10


Epoch #27: 20001it [00:57, 347.71it/s, env_step=540000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.865, pursuer_1/loss=1.037, pursuer_2/loss=1.003, pursuer_3/loss=1.102, pursuer_4/loss=0.976, pursuer_5/loss=0.949, pursuer_6/loss=1.045, pursuer_7/loss=0.950, rew=26.62]                            


Epoch #27: test_reward: 97.317125 ± 86.935032, best_reward: 140.479250 ± 94.230302 in #10


Epoch #28: 20001it [00:59, 336.06it/s, env_step=560000, len=2328, n/ep=0, n/st=400, pursuer_0/loss=0.890, pursuer_1/loss=0.928, pursuer_2/loss=0.905, pursuer_3/loss=1.037, pursuer_4/loss=1.042, pursuer_5/loss=1.066, pursuer_6/loss=1.061, pursuer_7/loss=1.129, rew=289.36]                           


Epoch #28: test_reward: 30.838562 ± 71.677289, best_reward: 140.479250 ± 94.230302 in #10


Epoch #29: 20001it [00:57, 345.88it/s, env_step=580000, len=3696, n/ep=0, n/st=400, pursuer_0/loss=0.885, pursuer_1/loss=0.930, pursuer_2/loss=1.162, pursuer_3/loss=0.946, pursuer_4/loss=1.014, pursuer_5/loss=0.999, pursuer_6/loss=1.128, pursuer_7/loss=1.001, rew=149.53]                           


Epoch #29: test_reward: 52.856062 ± 108.429016, best_reward: 140.479250 ± 94.230302 in #10


Epoch #30: 20001it [01:00, 332.75it/s, env_step=600000, len=1784, n/ep=0, n/st=400, pursuer_0/loss=0.968, pursuer_1/loss=1.016, pursuer_2/loss=0.981, pursuer_3/loss=1.160, pursuer_4/loss=0.953, pursuer_5/loss=1.032, pursuer_6/loss=1.190, pursuer_7/loss=1.148, rew=391.79]                           


Best Saved
Epoch #30: test_reward: 185.799000 ± 126.518256, best_reward: 185.799000 ± 126.518256 in #30


Epoch #31: 20001it [00:57, 349.53it/s, env_step=620000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.896, pursuer_1/loss=0.986, pursuer_2/loss=0.878, pursuer_3/loss=0.992, pursuer_4/loss=0.917, pursuer_5/loss=1.086, pursuer_6/loss=1.122, pursuer_7/loss=1.001, rew=112.83]                           


Epoch #31: test_reward: 22.428812 ± 60.211579, best_reward: 185.799000 ± 126.518256 in #30


Epoch #32: 20001it [01:00, 329.31it/s, env_step=640000, len=2096, n/ep=0, n/st=400, pursuer_0/loss=0.903, pursuer_1/loss=1.032, pursuer_2/loss=0.981, pursuer_3/loss=1.007, pursuer_4/loss=0.953, pursuer_5/loss=1.047, pursuer_6/loss=1.044, pursuer_7/loss=1.090, rew=302.60]                           


Epoch #32: test_reward: 11.748500 ± 61.115983, best_reward: 185.799000 ± 126.518256 in #30


Epoch #33: 20001it [00:58, 343.86it/s, env_step=660000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.825, pursuer_1/loss=0.946, pursuer_2/loss=0.933, pursuer_3/loss=0.873, pursuer_4/loss=0.965, pursuer_5/loss=0.912, pursuer_6/loss=0.859, pursuer_7/loss=1.006, rew=97.17]                            


Epoch #33: test_reward: 160.022187 ± 124.558807, best_reward: 185.799000 ± 126.518256 in #30


Epoch #34: 20001it [00:57, 345.92it/s, env_step=680000, len=1824, n/ep=0, n/st=400, pursuer_0/loss=0.922, pursuer_1/loss=1.048, pursuer_2/loss=1.015, pursuer_3/loss=0.970, pursuer_4/loss=0.928, pursuer_5/loss=1.125, pursuer_6/loss=1.088, pursuer_7/loss=1.031, rew=319.03]                           


Epoch #34: test_reward: 96.997937 ± 94.047810, best_reward: 185.799000 ± 126.518256 in #30


Epoch #35: 20001it [00:57, 349.78it/s, env_step=700000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.871, pursuer_1/loss=0.958, pursuer_2/loss=0.961, pursuer_3/loss=1.117, pursuer_4/loss=1.030, pursuer_5/loss=1.038, pursuer_6/loss=1.166, pursuer_7/loss=1.056, rew=40.60]                            


Epoch #35: test_reward: 148.404625 ± 127.976617, best_reward: 185.799000 ± 126.518256 in #30


Epoch #36: 20001it [00:57, 348.54it/s, env_step=720000, len=3952, n/ep=0, n/st=400, pursuer_0/loss=0.868, pursuer_1/loss=0.950, pursuer_2/loss=1.018, pursuer_3/loss=0.981, pursuer_4/loss=0.920, pursuer_5/loss=1.058, pursuer_6/loss=1.045, pursuer_7/loss=0.976, rew=184.54]                           


Epoch #36: test_reward: 47.665937 ± 65.080454, best_reward: 185.799000 ± 126.518256 in #30


Epoch #37: 20001it [01:01, 327.52it/s, env_step=740000, len=3184, n/ep=1, n/st=400, pursuer_0/loss=0.814, pursuer_1/loss=1.013, pursuer_2/loss=0.908, pursuer_3/loss=0.888, pursuer_4/loss=0.955, pursuer_5/loss=1.034, pursuer_6/loss=0.908, pursuer_7/loss=0.991, rew=169.96]                           


Epoch #37: test_reward: 81.901750 ± 67.003241, best_reward: 185.799000 ± 126.518256 in #30


Epoch #38: 20001it [00:58, 341.70it/s, env_step=760000, len=3480, n/ep=0, n/st=400, pursuer_0/loss=0.894, pursuer_1/loss=0.984, pursuer_2/loss=1.033, pursuer_3/loss=0.932, pursuer_4/loss=1.016, pursuer_5/loss=1.171, pursuer_6/loss=1.100, pursuer_7/loss=1.014, rew=152.92]                           


Epoch #38: test_reward: 111.967500 ± 159.493778, best_reward: 185.799000 ± 126.518256 in #30


Epoch #39: 20001it [00:59, 338.18it/s, env_step=780000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.874, pursuer_1/loss=1.055, pursuer_2/loss=1.020, pursuer_3/loss=1.067, pursuer_4/loss=0.935, pursuer_5/loss=0.916, pursuer_6/loss=1.203, pursuer_7/loss=1.090, rew=92.00]                            


Epoch #39: test_reward: 42.455000 ± 120.442497, best_reward: 185.799000 ± 126.518256 in #30


Epoch #40: 20001it [01:01, 327.31it/s, env_step=800000, len=3304, n/ep=0, n/st=400, pursuer_0/loss=0.844, pursuer_1/loss=0.917, pursuer_2/loss=0.953, pursuer_3/loss=1.018, pursuer_4/loss=0.956, pursuer_5/loss=1.012, pursuer_6/loss=1.003, pursuer_7/loss=0.981, rew=250.69]                           


Epoch #40: test_reward: 110.363437 ± 121.855389, best_reward: 185.799000 ± 126.518256 in #30


Epoch #41: 20001it [01:01, 326.59it/s, env_step=820000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=0.816, pursuer_1/loss=0.825, pursuer_2/loss=0.972, pursuer_3/loss=0.980, pursuer_4/loss=0.868, pursuer_5/loss=0.942, pursuer_6/loss=1.021, pursuer_7/loss=1.040, rew=57.17]                            


Epoch #41: test_reward: 14.213437 ± 46.922113, best_reward: 185.799000 ± 126.518256 in #30


Epoch #42: 20001it [01:02, 321.25it/s, env_step=840000, len=2328, n/ep=0, n/st=400, pursuer_0/loss=1.047, pursuer_1/loss=1.072, pursuer_2/loss=1.113, pursuer_3/loss=1.077, pursuer_4/loss=1.167, pursuer_5/loss=1.053, pursuer_6/loss=1.265, pursuer_7/loss=1.127, rew=164.71]                           


Epoch #42: test_reward: 121.622750 ± 92.588917, best_reward: 185.799000 ± 126.518256 in #30


Epoch #43: 20001it [00:59, 337.45it/s, env_step=860000, len=3492, n/ep=0, n/st=400, pursuer_0/loss=0.893, pursuer_1/loss=1.065, pursuer_2/loss=1.048, pursuer_3/loss=0.967, pursuer_4/loss=0.965, pursuer_5/loss=1.039, pursuer_6/loss=1.015, pursuer_7/loss=1.115, rew=153.01]                           


Epoch #43: test_reward: 38.150125 ± 60.733101, best_reward: 185.799000 ± 126.518256 in #30


Epoch #44: 20001it [00:58, 340.38it/s, env_step=880000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.019, pursuer_1/loss=1.001, pursuer_2/loss=1.078, pursuer_3/loss=1.189, pursuer_4/loss=1.026, pursuer_5/loss=1.087, pursuer_6/loss=1.169, pursuer_7/loss=1.089, rew=45.53]                            


Epoch #44: test_reward: 98.625062 ± 92.151919, best_reward: 185.799000 ± 126.518256 in #30


Epoch #45: 20001it [00:59, 338.67it/s, env_step=900000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.857, pursuer_1/loss=1.004, pursuer_2/loss=1.003, pursuer_3/loss=0.958, pursuer_4/loss=0.913, pursuer_5/loss=1.038, pursuer_6/loss=1.202, pursuer_7/loss=1.113, rew=222.51]                           


Epoch #45: test_reward: 76.258875 ± 42.092987, best_reward: 185.799000 ± 126.518256 in #30


Epoch #46: 20001it [00:57, 349.00it/s, env_step=920000, len=1408, n/ep=0, n/st=400, pursuer_0/loss=0.924, pursuer_1/loss=0.954, pursuer_2/loss=1.049, pursuer_3/loss=1.143, pursuer_4/loss=0.884, pursuer_5/loss=1.035, pursuer_6/loss=1.045, pursuer_7/loss=1.109, rew=390.19]                           


Epoch #46: test_reward: 38.879937 ± 71.472560, best_reward: 185.799000 ± 126.518256 in #30


Epoch #47: 20001it [00:56, 353.57it/s, env_step=940000, len=1224, n/ep=0, n/st=400, pursuer_0/loss=0.943, pursuer_1/loss=1.041, pursuer_2/loss=1.100, pursuer_3/loss=1.137, pursuer_4/loss=0.965, pursuer_5/loss=1.041, pursuer_6/loss=1.144, pursuer_7/loss=1.066, rew=486.99]                           


Epoch #47: test_reward: 112.245937 ± 113.176097, best_reward: 185.799000 ± 126.518256 in #30


Epoch #48: 20001it [00:59, 335.51it/s, env_step=960000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.962, pursuer_1/loss=0.976, pursuer_2/loss=1.032, pursuer_3/loss=1.190, pursuer_4/loss=1.008, pursuer_5/loss=1.042, pursuer_6/loss=1.185, pursuer_7/loss=1.140, rew=77.22]                            


Epoch #48: test_reward: 139.179125 ± 122.239943, best_reward: 185.799000 ± 126.518256 in #30


Epoch #49: 20001it [01:00, 327.98it/s, env_step=980000, len=1536, n/ep=0, n/st=400, pursuer_0/loss=0.981, pursuer_1/loss=0.974, pursuer_2/loss=1.135, pursuer_3/loss=1.211, pursuer_4/loss=0.879, pursuer_5/loss=1.111, pursuer_6/loss=1.180, pursuer_7/loss=1.250, rew=312.39]                           


Epoch #49: test_reward: 162.878000 ± 144.460196, best_reward: 185.799000 ± 126.518256 in #30


Epoch #50: 20001it [00:59, 335.17it/s, env_step=1000000, len=1208, n/ep=0, n/st=400, pursuer_0/loss=1.026, pursuer_1/loss=1.007, pursuer_2/loss=0.965, pursuer_3/loss=1.078, pursuer_4/loss=0.945, pursuer_5/loss=1.019, pursuer_6/loss=1.120, pursuer_7/loss=1.173, rew=399.22]                           


Epoch #50: test_reward: 110.009562 ± 110.292578, best_reward: 185.799000 ± 126.518256 in #30


Epoch #51: 20001it [00:58, 342.62it/s, env_step=1020000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.928, pursuer_1/loss=1.119, pursuer_2/loss=1.197, pursuer_3/loss=1.186, pursuer_4/loss=0.986, pursuer_5/loss=1.019, pursuer_6/loss=1.128, pursuer_7/loss=1.260, rew=64.76]                            


Best Saved
Epoch #51: test_reward: 201.790875 ± 125.953629, best_reward: 201.790875 ± 125.953629 in #51


Epoch #52: 20001it [00:57, 347.81it/s, env_step=1040000, len=2536, n/ep=0, n/st=400, pursuer_0/loss=0.996, pursuer_1/loss=1.125, pursuer_2/loss=1.189, pursuer_3/loss=1.178, pursuer_4/loss=1.009, pursuer_5/loss=1.207, pursuer_6/loss=1.134, pursuer_7/loss=1.261, rew=251.69]                           


Epoch #52: test_reward: 56.404125 ± 75.983410, best_reward: 201.790875 ± 125.953629 in #51


Epoch #53: 20001it [00:58, 339.76it/s, env_step=1060000, len=1976, n/ep=0, n/st=400, pursuer_0/loss=1.002, pursuer_1/loss=1.168, pursuer_2/loss=1.130, pursuer_3/loss=1.305, pursuer_4/loss=0.994, pursuer_5/loss=1.161, pursuer_6/loss=1.237, pursuer_7/loss=1.318, rew=417.85]                           


Epoch #53: test_reward: 123.073687 ± 94.820867, best_reward: 201.790875 ± 125.953629 in #51


Epoch #54: 20001it [00:59, 333.59it/s, env_step=1080000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.791, pursuer_1/loss=0.958, pursuer_2/loss=1.073, pursuer_3/loss=1.245, pursuer_4/loss=0.970, pursuer_5/loss=1.115, pursuer_6/loss=1.250, pursuer_7/loss=1.278, rew=155.12]                           


Epoch #54: test_reward: 108.994250 ± 124.447719, best_reward: 201.790875 ± 125.953629 in #51


Epoch #55: 20001it [00:59, 335.00it/s, env_step=1100000, len=2616, n/ep=0, n/st=400, pursuer_0/loss=1.051, pursuer_1/loss=1.072, pursuer_2/loss=1.132, pursuer_3/loss=1.163, pursuer_4/loss=1.033, pursuer_5/loss=1.223, pursuer_6/loss=1.275, pursuer_7/loss=1.295, rew=208.94]                           


Best Saved
Epoch #55: test_reward: 217.651125 ± 178.301593, best_reward: 217.651125 ± 178.301593 in #55


Epoch #56: 20001it [00:57, 345.82it/s, env_step=1120000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.939, pursuer_1/loss=1.082, pursuer_2/loss=1.102, pursuer_3/loss=1.186, pursuer_4/loss=1.070, pursuer_5/loss=1.142, pursuer_6/loss=1.032, pursuer_7/loss=1.225, rew=32.76]                            


Epoch #56: test_reward: 34.272250 ± 76.922662, best_reward: 217.651125 ± 178.301593 in #55


Epoch #57: 20001it [00:57, 346.81it/s, env_step=1140000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.988, pursuer_1/loss=1.151, pursuer_2/loss=0.984, pursuer_3/loss=1.327, pursuer_4/loss=1.004, pursuer_5/loss=1.175, pursuer_6/loss=1.177, pursuer_7/loss=1.140, rew=78.34]                            


Epoch #57: test_reward: 120.518625 ± 113.802516, best_reward: 217.651125 ± 178.301593 in #55


Epoch #58: 20001it [00:59, 337.36it/s, env_step=1160000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.870, pursuer_1/loss=1.101, pursuer_2/loss=1.020, pursuer_3/loss=1.112, pursuer_4/loss=1.028, pursuer_5/loss=1.223, pursuer_6/loss=1.056, pursuer_7/loss=1.265, rew=11.09]                            


Epoch #58: test_reward: 40.208250 ± 78.894969, best_reward: 217.651125 ± 178.301593 in #55


Epoch #59: 20001it [00:59, 338.84it/s, env_step=1180000, len=2816, n/ep=0, n/st=400, pursuer_0/loss=1.064, pursuer_1/loss=1.122, pursuer_2/loss=1.152, pursuer_3/loss=1.205, pursuer_4/loss=1.159, pursuer_5/loss=1.141, pursuer_6/loss=1.260, pursuer_7/loss=1.194, rew=243.35]                           


Epoch #59: test_reward: 155.057687 ± 135.786199, best_reward: 217.651125 ± 178.301593 in #55


Epoch #60: 20001it [00:58, 342.76it/s, env_step=1200000, len=2648, n/ep=0, n/st=400, pursuer_0/loss=0.949, pursuer_1/loss=0.894, pursuer_2/loss=1.138, pursuer_3/loss=1.113, pursuer_4/loss=0.951, pursuer_5/loss=0.987, pursuer_6/loss=1.196, pursuer_7/loss=1.099, rew=422.01]                           


Epoch #60: test_reward: 110.678187 ± 98.304654, best_reward: 217.651125 ± 178.301593 in #55


Epoch #61: 20001it [00:58, 340.06it/s, env_step=1220000, len=3120, n/ep=1, n/st=400, pursuer_0/loss=0.922, pursuer_1/loss=1.055, pursuer_2/loss=1.088, pursuer_3/loss=1.147, pursuer_4/loss=0.986, pursuer_5/loss=1.114, pursuer_6/loss=1.222, pursuer_7/loss=1.189, rew=251.04]                           


Epoch #61: test_reward: 103.732750 ± 74.431196, best_reward: 217.651125 ± 178.301593 in #55


Epoch #62: 20001it [00:59, 336.75it/s, env_step=1240000, len=3008, n/ep=0, n/st=400, pursuer_0/loss=0.922, pursuer_1/loss=0.971, pursuer_2/loss=1.110, pursuer_3/loss=1.194, pursuer_4/loss=0.955, pursuer_5/loss=1.028, pursuer_6/loss=1.079, pursuer_7/loss=1.130, rew=249.88]                           


Epoch #62: test_reward: 5.083562 ± 64.053328, best_reward: 217.651125 ± 178.301593 in #55


Epoch #63: 20001it [00:58, 341.82it/s, env_step=1260000, len=1848, n/ep=0, n/st=400, pursuer_0/loss=0.901, pursuer_1/loss=1.024, pursuer_2/loss=1.063, pursuer_3/loss=1.146, pursuer_4/loss=1.138, pursuer_5/loss=1.112, pursuer_6/loss=1.057, pursuer_7/loss=1.256, rew=281.08]                           


Best Saved
Epoch #63: test_reward: 244.216250 ± 146.477934, best_reward: 244.216250 ± 146.477934 in #63


Epoch #64: 20001it [00:56, 351.07it/s, env_step=1280000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.008, pursuer_1/loss=1.011, pursuer_2/loss=1.049, pursuer_3/loss=1.177, pursuer_4/loss=1.033, pursuer_5/loss=1.113, pursuer_6/loss=1.095, pursuer_7/loss=1.146, rew=94.94]                            


Epoch #64: test_reward: 88.574437 ± 101.618788, best_reward: 244.216250 ± 146.477934 in #63


Epoch #65: 20001it [00:55, 357.74it/s, env_step=1300000, len=1588, n/ep=0, n/st=400, pursuer_0/loss=0.899, pursuer_1/loss=1.089, pursuer_2/loss=0.998, pursuer_3/loss=1.077, pursuer_4/loss=0.991, pursuer_5/loss=1.109, pursuer_6/loss=1.173, pursuer_7/loss=1.088, rew=389.05]                           


Epoch #65: test_reward: 165.914062 ± 137.020080, best_reward: 244.216250 ± 146.477934 in #63


Epoch #66: 20001it [00:58, 341.29it/s, env_step=1320000, len=1880, n/ep=0, n/st=400, pursuer_0/loss=0.936, pursuer_1/loss=0.993, pursuer_2/loss=1.150, pursuer_3/loss=0.908, pursuer_4/loss=0.979, pursuer_5/loss=0.948, pursuer_6/loss=1.072, pursuer_7/loss=0.987, rew=352.88]                           


Epoch #66: test_reward: 31.752187 ± 58.742288, best_reward: 244.216250 ± 146.477934 in #63


Epoch #67: 20001it [00:55, 362.14it/s, env_step=1340000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.943, pursuer_1/loss=1.067, pursuer_2/loss=1.173, pursuer_3/loss=1.092, pursuer_4/loss=1.067, pursuer_5/loss=1.059, pursuer_6/loss=1.078, pursuer_7/loss=1.055, rew=101.96]                           


Epoch #67: test_reward: 43.739937 ± 78.103980, best_reward: 244.216250 ± 146.477934 in #63


Epoch #68: 20001it [00:55, 360.11it/s, env_step=1360000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.990, pursuer_1/loss=1.061, pursuer_2/loss=1.215, pursuer_3/loss=1.062, pursuer_4/loss=1.083, pursuer_5/loss=1.147, pursuer_6/loss=1.098, pursuer_7/loss=1.268, rew=109.51]                           


Epoch #68: test_reward: 64.503312 ± 95.738547, best_reward: 244.216250 ± 146.477934 in #63


Epoch #69: 20001it [00:57, 348.56it/s, env_step=1380000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.896, pursuer_1/loss=0.911, pursuer_2/loss=1.045, pursuer_3/loss=1.050, pursuer_4/loss=0.971, pursuer_5/loss=1.004, pursuer_6/loss=1.067, pursuer_7/loss=1.093, rew=116.42]                           


Epoch #69: test_reward: 123.644937 ± 120.379162, best_reward: 244.216250 ± 146.477934 in #63


Epoch #70: 20001it [00:56, 356.66it/s, env_step=1400000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.953, pursuer_1/loss=0.900, pursuer_2/loss=1.025, pursuer_3/loss=1.018, pursuer_4/loss=0.989, pursuer_5/loss=0.936, pursuer_6/loss=1.061, pursuer_7/loss=1.093, rew=46.08]                            


Epoch #70: test_reward: 135.542812 ± 123.056327, best_reward: 244.216250 ± 146.477934 in #63


Epoch #71: 20001it [00:56, 356.51it/s, env_step=1420000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.897, pursuer_1/loss=0.906, pursuer_2/loss=0.989, pursuer_3/loss=1.057, pursuer_4/loss=0.926, pursuer_5/loss=0.968, pursuer_6/loss=0.948, pursuer_7/loss=1.056, rew=59.62]                            


Epoch #71: test_reward: 79.592250 ± 66.233850, best_reward: 244.216250 ± 146.477934 in #63


Epoch #72: 20001it [00:57, 345.61it/s, env_step=1440000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.960, pursuer_1/loss=0.957, pursuer_2/loss=1.012, pursuer_3/loss=1.050, pursuer_4/loss=0.956, pursuer_5/loss=0.974, pursuer_6/loss=0.871, pursuer_7/loss=1.106, rew=123.35]                           


Epoch #72: test_reward: 129.155812 ± 126.537760, best_reward: 244.216250 ± 146.477934 in #63


Epoch #73: 20001it [00:56, 355.93it/s, env_step=1460000, len=3272, n/ep=0, n/st=400, pursuer_0/loss=0.928, pursuer_1/loss=0.914, pursuer_2/loss=1.045, pursuer_3/loss=1.049, pursuer_4/loss=0.962, pursuer_5/loss=0.910, pursuer_6/loss=1.005, pursuer_7/loss=1.047, rew=139.64]                           


Epoch #73: test_reward: 60.081687 ± 80.525448, best_reward: 244.216250 ± 146.477934 in #63


Epoch #74: 20001it [00:58, 339.33it/s, env_step=1480000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.970, pursuer_1/loss=0.920, pursuer_2/loss=1.018, pursuer_3/loss=1.001, pursuer_4/loss=1.079, pursuer_5/loss=0.977, pursuer_6/loss=0.998, pursuer_7/loss=1.126, rew=76.78]                            


Epoch #74: test_reward: 137.332062 ± 106.957661, best_reward: 244.216250 ± 146.477934 in #63


Epoch #75: 20001it [00:57, 350.31it/s, env_step=1500000, len=1264, n/ep=0, n/st=400, pursuer_0/loss=0.898, pursuer_1/loss=0.950, pursuer_2/loss=0.935, pursuer_3/loss=1.045, pursuer_4/loss=1.000, pursuer_5/loss=1.017, pursuer_6/loss=0.881, pursuer_7/loss=1.047, rew=403.82]                           


Epoch #75: test_reward: 141.264500 ± 147.841314, best_reward: 244.216250 ± 146.477934 in #63


Epoch #76: 20001it [00:57, 349.13it/s, env_step=1520000, len=1312, n/ep=0, n/st=400, pursuer_0/loss=0.991, pursuer_1/loss=1.033, pursuer_2/loss=1.123, pursuer_3/loss=1.040, pursuer_4/loss=1.076, pursuer_5/loss=1.079, pursuer_6/loss=1.029, pursuer_7/loss=1.211, rew=458.09]                           


Epoch #76: test_reward: 71.119625 ± 74.215555, best_reward: 244.216250 ± 146.477934 in #63


Epoch #77: 20001it [00:57, 345.24it/s, env_step=1540000, len=3280, n/ep=0, n/st=400, pursuer_0/loss=0.834, pursuer_1/loss=0.852, pursuer_2/loss=0.961, pursuer_3/loss=1.028, pursuer_4/loss=1.063, pursuer_5/loss=0.965, pursuer_6/loss=1.036, pursuer_7/loss=1.121, rew=217.08]                           


Epoch #77: test_reward: 134.901875 ± 137.593199, best_reward: 244.216250 ± 146.477934 in #63


Epoch #78: 20001it [00:57, 347.62it/s, env_step=1560000, len=2656, n/ep=0, n/st=400, pursuer_0/loss=0.951, pursuer_1/loss=1.022, pursuer_2/loss=1.040, pursuer_3/loss=0.999, pursuer_4/loss=0.980, pursuer_5/loss=0.870, pursuer_6/loss=0.967, pursuer_7/loss=1.089, rew=237.20]                           


Epoch #78: test_reward: 152.941312 ± 108.181336, best_reward: 244.216250 ± 146.477934 in #63


Epoch #79: 20001it [00:56, 351.74it/s, env_step=1580000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.939, pursuer_1/loss=0.872, pursuer_2/loss=0.985, pursuer_3/loss=1.020, pursuer_4/loss=1.055, pursuer_5/loss=0.946, pursuer_6/loss=0.968, pursuer_7/loss=1.119, rew=177.15]                           


Epoch #79: test_reward: 110.837000 ± 105.094054, best_reward: 244.216250 ± 146.477934 in #63


Epoch #80: 20001it [00:57, 346.08it/s, env_step=1600000, len=1704, n/ep=0, n/st=400, pursuer_0/loss=0.879, pursuer_1/loss=0.906, pursuer_2/loss=1.046, pursuer_3/loss=1.058, pursuer_4/loss=1.054, pursuer_5/loss=0.999, pursuer_6/loss=0.919, pursuer_7/loss=0.998, rew=429.58]                           


Epoch #80: test_reward: 66.209125 ± 83.648856, best_reward: 244.216250 ± 146.477934 in #63


Epoch #81: 20001it [00:55, 359.03it/s, env_step=1620000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.033, pursuer_1/loss=0.807, pursuer_2/loss=1.015, pursuer_3/loss=1.086, pursuer_4/loss=1.072, pursuer_5/loss=0.904, pursuer_6/loss=0.960, pursuer_7/loss=1.058, rew=145.50]                           


Epoch #81: test_reward: 69.049500 ± 126.400531, best_reward: 244.216250 ± 146.477934 in #63


Epoch #82: 20001it [00:58, 342.61it/s, env_step=1640000, len=1616, n/ep=0, n/st=400, pursuer_0/loss=0.990, pursuer_1/loss=0.790, pursuer_2/loss=0.976, pursuer_3/loss=0.981, pursuer_4/loss=0.948, pursuer_5/loss=1.019, pursuer_6/loss=1.049, pursuer_7/loss=1.069, rew=389.90]                           


Epoch #82: test_reward: 67.743375 ± 107.605268, best_reward: 244.216250 ± 146.477934 in #63


Epoch #83: 20001it [00:55, 357.79it/s, env_step=1660000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.934, pursuer_1/loss=0.905, pursuer_2/loss=0.991, pursuer_3/loss=1.151, pursuer_4/loss=1.025, pursuer_5/loss=0.899, pursuer_6/loss=1.094, pursuer_7/loss=1.145, rew=50.66]                            


Epoch #83: test_reward: 60.174187 ± 97.958204, best_reward: 244.216250 ± 146.477934 in #63


Epoch #84: 20001it [00:57, 350.70it/s, env_step=1680000, len=1144, n/ep=0, n/st=400, pursuer_0/loss=0.934, pursuer_1/loss=0.944, pursuer_2/loss=0.981, pursuer_3/loss=1.133, pursuer_4/loss=1.028, pursuer_5/loss=1.026, pursuer_6/loss=1.012, pursuer_7/loss=1.124, rew=399.99]                           


Epoch #84: test_reward: 15.835187 ± 88.292675, best_reward: 244.216250 ± 146.477934 in #63


Epoch #85: 20001it [00:56, 352.03it/s, env_step=1700000, len=1568, n/ep=0, n/st=400, pursuer_0/loss=0.883, pursuer_1/loss=0.930, pursuer_2/loss=1.076, pursuer_3/loss=1.188, pursuer_4/loss=1.001, pursuer_5/loss=0.941, pursuer_6/loss=1.137, pursuer_7/loss=0.999, rew=403.59]                           


Epoch #85: test_reward: 93.493687 ± 117.426460, best_reward: 244.216250 ± 146.477934 in #63


Epoch #86: 20001it [00:56, 354.05it/s, env_step=1720000, len=2016, n/ep=0, n/st=400, pursuer_0/loss=0.763, pursuer_1/loss=0.784, pursuer_2/loss=1.012, pursuer_3/loss=1.043, pursuer_4/loss=1.017, pursuer_5/loss=0.987, pursuer_6/loss=1.053, pursuer_7/loss=0.975, rew=312.94]                           


Epoch #86: test_reward: 25.271625 ± 92.175809, best_reward: 244.216250 ± 146.477934 in #63


Epoch #87: 20001it [00:57, 345.98it/s, env_step=1740000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.888, pursuer_1/loss=0.930, pursuer_2/loss=1.022, pursuer_3/loss=0.970, pursuer_4/loss=0.854, pursuer_5/loss=0.981, pursuer_6/loss=0.968, pursuer_7/loss=1.068, rew=147.87]                           


Epoch #87: test_reward: 189.937125 ± 140.334313, best_reward: 244.216250 ± 146.477934 in #63


Epoch #88: 20001it [00:57, 348.82it/s, env_step=1760000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.914, pursuer_1/loss=0.967, pursuer_2/loss=0.971, pursuer_3/loss=1.118, pursuer_4/loss=1.057, pursuer_5/loss=0.969, pursuer_6/loss=1.063, pursuer_7/loss=1.030, rew=75.50]                            


Epoch #88: test_reward: 88.812562 ± 88.020727, best_reward: 244.216250 ± 146.477934 in #63


Epoch #89: 20001it [00:56, 352.66it/s, env_step=1780000, len=3032, n/ep=0, n/st=400, pursuer_0/loss=0.863, pursuer_1/loss=0.860, pursuer_2/loss=0.936, pursuer_3/loss=1.101, pursuer_4/loss=0.960, pursuer_5/loss=1.019, pursuer_6/loss=1.055, pursuer_7/loss=1.007, rew=189.49]                           


Best Saved
Epoch #89: test_reward: 265.721812 ± 149.947418, best_reward: 265.721812 ± 149.947418 in #89


Epoch #90: 20001it [00:57, 346.19it/s, env_step=1800000, len=2728, n/ep=0, n/st=400, pursuer_0/loss=1.015, pursuer_1/loss=1.054, pursuer_2/loss=1.021, pursuer_3/loss=1.165, pursuer_4/loss=0.969, pursuer_5/loss=1.035, pursuer_6/loss=1.088, pursuer_7/loss=1.125, rew=183.13]                           


Epoch #90: test_reward: 117.540312 ± 96.360354, best_reward: 265.721812 ± 149.947418 in #89


Epoch #91: 20001it [00:57, 347.08it/s, env_step=1820000, len=2504, n/ep=0, n/st=400, pursuer_0/loss=0.936, pursuer_1/loss=1.033, pursuer_2/loss=0.970, pursuer_3/loss=1.202, pursuer_4/loss=1.058, pursuer_5/loss=0.997, pursuer_6/loss=1.136, pursuer_7/loss=1.202, rew=314.84]                           


Epoch #91: test_reward: 171.837812 ± 148.797051, best_reward: 265.721812 ± 149.947418 in #89


Epoch #92: 20001it [00:57, 349.20it/s, env_step=1840000, len=2880, n/ep=0, n/st=400, pursuer_0/loss=0.927, pursuer_1/loss=0.964, pursuer_2/loss=1.101, pursuer_3/loss=1.082, pursuer_4/loss=0.925, pursuer_5/loss=1.077, pursuer_6/loss=1.059, pursuer_7/loss=1.122, rew=178.33]                           


Epoch #92: test_reward: 54.485312 ± 122.099150, best_reward: 265.721812 ± 149.947418 in #89


Epoch #93: 20001it [00:58, 343.85it/s, env_step=1860000, len=2304, n/ep=0, n/st=400, pursuer_0/loss=1.067, pursuer_1/loss=1.000, pursuer_2/loss=0.994, pursuer_3/loss=1.113, pursuer_4/loss=1.126, pursuer_5/loss=1.183, pursuer_6/loss=1.063, pursuer_7/loss=1.195, rew=290.21]                           


Epoch #93: test_reward: 98.499250 ± 84.309298, best_reward: 265.721812 ± 149.947418 in #89


Epoch #94: 20001it [00:58, 343.99it/s, env_step=1880000, len=1832, n/ep=1, n/st=400, pursuer_0/loss=0.958, pursuer_1/loss=1.055, pursuer_2/loss=1.150, pursuer_3/loss=1.151, pursuer_4/loss=1.084, pursuer_5/loss=1.053, pursuer_6/loss=1.047, pursuer_7/loss=1.107, rew=448.86]                           


Epoch #94: test_reward: 193.616500 ± 136.573900, best_reward: 265.721812 ± 149.947418 in #89


Epoch #95: 20001it [00:58, 342.97it/s, env_step=1900000, len=1208, n/ep=0, n/st=400, pursuer_0/loss=0.935, pursuer_1/loss=1.028, pursuer_2/loss=1.106, pursuer_3/loss=1.132, pursuer_4/loss=1.076, pursuer_5/loss=1.148, pursuer_6/loss=1.138, pursuer_7/loss=1.318, rew=412.51]                           


Best Saved
Epoch #95: test_reward: 303.294188 ± 168.545598, best_reward: 303.294188 ± 168.545598 in #95


Epoch #96: 20001it [00:57, 346.59it/s, env_step=1920000, len=2432, n/ep=0, n/st=400, pursuer_0/loss=0.913, pursuer_1/loss=1.077, pursuer_2/loss=1.013, pursuer_3/loss=1.213, pursuer_4/loss=1.087, pursuer_5/loss=1.111, pursuer_6/loss=1.144, pursuer_7/loss=1.167, rew=330.10]                           


Epoch #96: test_reward: 236.891187 ± 103.523471, best_reward: 303.294188 ± 168.545598 in #95


Epoch #97: 20001it [00:57, 346.38it/s, env_step=1940000, len=2976, n/ep=0, n/st=400, pursuer_0/loss=1.042, pursuer_1/loss=1.009, pursuer_2/loss=1.052, pursuer_3/loss=1.283, pursuer_4/loss=1.146, pursuer_5/loss=1.104, pursuer_6/loss=1.101, pursuer_7/loss=1.313, rew=213.99]                           


Epoch #97: test_reward: 216.997000 ± 184.118110, best_reward: 303.294188 ± 168.545598 in #95


Epoch #98: 20001it [00:58, 343.30it/s, env_step=1960000, len=2316, n/ep=2, n/st=400, pursuer_0/loss=1.030, pursuer_1/loss=1.191, pursuer_2/loss=1.367, pursuer_3/loss=1.290, pursuer_4/loss=1.211, pursuer_5/loss=1.342, pursuer_6/loss=1.339, pursuer_7/loss=1.376, rew=292.14]                           


Epoch #98: test_reward: 70.252062 ± 76.093497, best_reward: 303.294188 ± 168.545598 in #95


Epoch #99: 20001it [00:57, 348.32it/s, env_step=1980000, len=3792, n/ep=0, n/st=400, pursuer_0/loss=1.074, pursuer_1/loss=1.117, pursuer_2/loss=1.095, pursuer_3/loss=1.297, pursuer_4/loss=0.995, pursuer_5/loss=1.177, pursuer_6/loss=1.205, pursuer_7/loss=1.217, rew=122.65]                           


Epoch #99: test_reward: 107.595812 ± 65.005345, best_reward: 303.294188 ± 168.545598 in #95


Epoch #100: 20001it [00:58, 341.01it/s, env_step=2000000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.024, pursuer_1/loss=1.093, pursuer_2/loss=1.228, pursuer_3/loss=1.153, pursuer_4/loss=1.121, pursuer_5/loss=1.079, pursuer_6/loss=1.248, pursuer_7/loss=1.315, rew=42.30]                            


Epoch #100: test_reward: 204.750500 ± 151.578253, best_reward: 303.294188 ± 168.545598 in #95


Epoch #101: 20001it [00:56, 355.21it/s, env_step=2020000, len=3064, n/ep=0, n/st=400, pursuer_0/loss=1.144, pursuer_1/loss=1.262, pursuer_2/loss=1.282, pursuer_3/loss=1.384, pursuer_4/loss=1.224, pursuer_5/loss=1.286, pursuer_6/loss=1.295, pursuer_7/loss=1.313, rew=494.10]                           


Epoch #101: test_reward: 49.516125 ± 101.152142, best_reward: 303.294188 ± 168.545598 in #95


Epoch #102: 20001it [00:57, 349.88it/s, env_step=2040000, len=2456, n/ep=0, n/st=400, pursuer_0/loss=1.092, pursuer_1/loss=1.124, pursuer_2/loss=1.285, pursuer_3/loss=1.376, pursuer_4/loss=0.993, pursuer_5/loss=1.320, pursuer_6/loss=1.328, pursuer_7/loss=1.301, rew=449.39]                           


Epoch #102: test_reward: 279.076937 ± 211.812478, best_reward: 303.294188 ± 168.545598 in #95


Epoch #103: 20001it [00:56, 351.94it/s, env_step=2060000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.116, pursuer_1/loss=1.186, pursuer_2/loss=1.300, pursuer_3/loss=1.148, pursuer_4/loss=1.226, pursuer_5/loss=1.169, pursuer_6/loss=1.249, pursuer_7/loss=1.329, rew=70.11]                            


Epoch #103: test_reward: 302.704625 ± 234.263585, best_reward: 303.294188 ± 168.545598 in #95


Epoch #104: 20001it [00:58, 344.23it/s, env_step=2080000, len=3944, n/ep=0, n/st=400, pursuer_0/loss=0.950, pursuer_1/loss=1.038, pursuer_2/loss=1.204, pursuer_3/loss=1.255, pursuer_4/loss=1.088, pursuer_5/loss=1.335, pursuer_6/loss=1.154, pursuer_7/loss=1.219, rew=227.68]                           


Epoch #104: test_reward: 246.036125 ± 132.949626, best_reward: 303.294188 ± 168.545598 in #95


Epoch #105: 20001it [00:58, 344.78it/s, env_step=2100000, len=1768, n/ep=0, n/st=400, pursuer_0/loss=1.026, pursuer_1/loss=1.142, pursuer_2/loss=1.188, pursuer_3/loss=1.309, pursuer_4/loss=1.106, pursuer_5/loss=1.241, pursuer_6/loss=1.276, pursuer_7/loss=1.437, rew=353.51]                           


Epoch #105: test_reward: 42.666375 ± 91.592319, best_reward: 303.294188 ± 168.545598 in #95


Epoch #106: 20001it [00:56, 353.25it/s, env_step=2120000, len=1544, n/ep=0, n/st=400, pursuer_0/loss=1.055, pursuer_1/loss=1.112, pursuer_2/loss=1.195, pursuer_3/loss=1.361, pursuer_4/loss=1.206, pursuer_5/loss=1.118, pursuer_6/loss=1.262, pursuer_7/loss=1.297, rew=405.12]                           


Epoch #106: test_reward: 268.907750 ± 191.417478, best_reward: 303.294188 ± 168.545598 in #95


Epoch #107: 20001it [00:56, 355.00it/s, env_step=2140000, len=1872, n/ep=0, n/st=400, pursuer_0/loss=1.087, pursuer_1/loss=1.092, pursuer_2/loss=1.149, pursuer_3/loss=1.263, pursuer_4/loss=1.160, pursuer_5/loss=1.193, pursuer_6/loss=1.235, pursuer_7/loss=1.317, rew=403.54]                           


Epoch #107: test_reward: 189.457187 ± 107.502761, best_reward: 303.294188 ± 168.545598 in #95


Epoch #108: 20001it [00:57, 350.11it/s, env_step=2160000, len=3480, n/ep=0, n/st=400, pursuer_0/loss=1.092, pursuer_1/loss=1.040, pursuer_2/loss=1.222, pursuer_3/loss=1.170, pursuer_4/loss=1.088, pursuer_5/loss=1.153, pursuer_6/loss=1.189, pursuer_7/loss=1.259, rew=221.31]                           


Epoch #108: test_reward: 232.287062 ± 200.837540, best_reward: 303.294188 ± 168.545598 in #95


Epoch #109: 20001it [00:57, 346.52it/s, env_step=2180000, len=2840, n/ep=0, n/st=400, pursuer_0/loss=0.999, pursuer_1/loss=0.933, pursuer_2/loss=0.985, pursuer_3/loss=1.089, pursuer_4/loss=0.961, pursuer_5/loss=1.102, pursuer_6/loss=1.232, pursuer_7/loss=1.187, rew=235.58]                           


Epoch #109: test_reward: 287.040312 ± 138.933391, best_reward: 303.294188 ± 168.545598 in #95


Epoch #110: 20001it [00:58, 344.29it/s, env_step=2200000, len=1888, n/ep=0, n/st=400, pursuer_0/loss=1.076, pursuer_1/loss=1.063, pursuer_2/loss=1.233, pursuer_3/loss=1.264, pursuer_4/loss=1.210, pursuer_5/loss=1.176, pursuer_6/loss=1.408, pursuer_7/loss=1.358, rew=266.69]                           


Epoch #110: test_reward: 118.563312 ± 114.401915, best_reward: 303.294188 ± 168.545598 in #95


Epoch #111: 20001it [00:57, 349.61it/s, env_step=2220000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=0.913, pursuer_1/loss=1.089, pursuer_2/loss=1.090, pursuer_3/loss=1.096, pursuer_4/loss=1.106, pursuer_5/loss=1.015, pursuer_6/loss=1.211, pursuer_7/loss=1.313, rew=150.58]                           


Epoch #111: test_reward: 257.949813 ± 101.392048, best_reward: 303.294188 ± 168.545598 in #95


Epoch #112: 20001it [00:56, 351.56it/s, env_step=2240000, len=1632, n/ep=1, n/st=400, pursuer_0/loss=1.107, pursuer_1/loss=0.999, pursuer_2/loss=1.241, pursuer_3/loss=1.163, pursuer_4/loss=1.252, pursuer_5/loss=1.101, pursuer_6/loss=1.123, pursuer_7/loss=1.283, rew=405.60]                           


Epoch #112: test_reward: 262.091125 ± 145.432633, best_reward: 303.294188 ± 168.545598 in #95


Epoch #113: 20001it [00:56, 353.77it/s, env_step=2260000, len=1912, n/ep=0, n/st=400, pursuer_0/loss=1.133, pursuer_1/loss=1.130, pursuer_2/loss=1.195, pursuer_3/loss=1.301, pursuer_4/loss=1.180, pursuer_5/loss=1.217, pursuer_6/loss=1.213, pursuer_7/loss=1.342, rew=268.57]                           


Epoch #113: test_reward: 170.248812 ± 125.808383, best_reward: 303.294188 ± 168.545598 in #95


Epoch #114: 20001it [00:57, 347.71it/s, env_step=2280000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.031, pursuer_1/loss=1.111, pursuer_2/loss=1.142, pursuer_3/loss=1.135, pursuer_4/loss=1.158, pursuer_5/loss=1.214, pursuer_6/loss=1.144, pursuer_7/loss=1.138, rew=162.77]                           


Epoch #114: test_reward: 254.345250 ± 146.747201, best_reward: 303.294188 ± 168.545598 in #95


Epoch #115: 20001it [00:56, 351.17it/s, env_step=2300000, len=3576, n/ep=0, n/st=400, pursuer_0/loss=1.055, pursuer_1/loss=0.973, pursuer_2/loss=1.087, pursuer_3/loss=1.030, pursuer_4/loss=1.013, pursuer_5/loss=1.134, pursuer_6/loss=1.155, pursuer_7/loss=1.230, rew=172.88]                           


Epoch #115: test_reward: 216.459625 ± 120.076624, best_reward: 303.294188 ± 168.545598 in #95


Epoch #116: 20001it [00:58, 344.36it/s, env_step=2320000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.159, pursuer_1/loss=1.116, pursuer_2/loss=1.234, pursuer_3/loss=1.335, pursuer_4/loss=1.150, pursuer_5/loss=1.139, pursuer_6/loss=1.203, pursuer_7/loss=1.398, rew=175.79]                           


Epoch #116: test_reward: 210.879375 ± 141.263100, best_reward: 303.294188 ± 168.545598 in #95


Epoch #117: 20001it [00:56, 357.01it/s, env_step=2340000, len=3864, n/ep=0, n/st=400, pursuer_0/loss=0.952, pursuer_1/loss=1.068, pursuer_2/loss=1.193, pursuer_3/loss=1.083, pursuer_4/loss=1.144, pursuer_5/loss=1.189, pursuer_6/loss=1.284, pursuer_7/loss=1.367, rew=224.95]                           


Epoch #117: test_reward: 268.829000 ± 150.091603, best_reward: 303.294188 ± 168.545598 in #95


Epoch #118: 20001it [00:58, 343.31it/s, env_step=2360000, len=2760, n/ep=0, n/st=400, pursuer_0/loss=1.090, pursuer_1/loss=1.140, pursuer_2/loss=1.095, pursuer_3/loss=1.027, pursuer_4/loss=1.100, pursuer_5/loss=1.173, pursuer_6/loss=1.094, pursuer_7/loss=1.174, rew=226.51]                           


Epoch #118: test_reward: 132.958187 ± 108.143737, best_reward: 303.294188 ± 168.545598 in #95


Epoch #119: 20001it [00:57, 345.36it/s, env_step=2380000, len=2232, n/ep=0, n/st=400, pursuer_0/loss=1.121, pursuer_1/loss=1.252, pursuer_2/loss=1.223, pursuer_3/loss=1.203, pursuer_4/loss=1.184, pursuer_5/loss=1.372, pursuer_6/loss=1.211, pursuer_7/loss=1.264, rew=312.91]                           


Epoch #119: test_reward: 220.721250 ± 151.633876, best_reward: 303.294188 ± 168.545598 in #95


Epoch #120: 20001it [00:57, 345.17it/s, env_step=2400000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.185, pursuer_1/loss=1.226, pursuer_2/loss=1.204, pursuer_3/loss=1.278, pursuer_4/loss=1.338, pursuer_5/loss=1.197, pursuer_6/loss=1.241, pursuer_7/loss=1.330, rew=117.82]                           


Epoch #120: test_reward: 130.974187 ± 102.669836, best_reward: 303.294188 ± 168.545598 in #95


Epoch #121: 20001it [00:56, 352.93it/s, env_step=2420000, len=2280, n/ep=0, n/st=400, pursuer_0/loss=1.060, pursuer_1/loss=1.047, pursuer_2/loss=1.245, pursuer_3/loss=1.281, pursuer_4/loss=1.184, pursuer_5/loss=1.137, pursuer_6/loss=1.304, pursuer_7/loss=1.314, rew=477.38]                           


Epoch #121: test_reward: 208.882375 ± 146.486215, best_reward: 303.294188 ± 168.545598 in #95


Epoch #122: 20001it [00:57, 348.89it/s, env_step=2440000, len=1552, n/ep=0, n/st=400, pursuer_0/loss=0.976, pursuer_1/loss=1.081, pursuer_2/loss=1.044, pursuer_3/loss=1.086, pursuer_4/loss=1.164, pursuer_5/loss=1.135, pursuer_6/loss=1.198, pursuer_7/loss=1.172, rew=364.74]                           


Epoch #122: test_reward: 227.863875 ± 130.495130, best_reward: 303.294188 ± 168.545598 in #95


Epoch #123: 20001it [00:57, 348.67it/s, env_step=2460000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.118, pursuer_1/loss=1.101, pursuer_2/loss=1.162, pursuer_3/loss=1.278, pursuer_4/loss=1.116, pursuer_5/loss=1.189, pursuer_6/loss=1.207, pursuer_7/loss=1.235, rew=86.14]                            


Epoch #123: test_reward: 146.864625 ± 223.624385, best_reward: 303.294188 ± 168.545598 in #95


Epoch #124: 20001it [00:55, 361.40it/s, env_step=2480000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.021, pursuer_1/loss=1.047, pursuer_2/loss=1.111, pursuer_3/loss=1.193, pursuer_4/loss=1.093, pursuer_5/loss=1.196, pursuer_6/loss=1.195, pursuer_7/loss=1.279, rew=245.12]                           


Epoch #124: test_reward: 276.911250 ± 158.377161, best_reward: 303.294188 ± 168.545598 in #95


Epoch #125: 20001it [00:58, 344.21it/s, env_step=2500000, len=1960, n/ep=0, n/st=400, pursuer_0/loss=1.004, pursuer_1/loss=1.091, pursuer_2/loss=1.172, pursuer_3/loss=1.249, pursuer_4/loss=1.142, pursuer_5/loss=1.226, pursuer_6/loss=1.229, pursuer_7/loss=1.298, rew=333.80]                           


Epoch #125: test_reward: 126.113500 ± 103.642096, best_reward: 303.294188 ± 168.545598 in #95


Epoch #126: 20001it [00:56, 352.77it/s, env_step=2520000, len=3512, n/ep=0, n/st=400, pursuer_0/loss=1.153, pursuer_1/loss=1.145, pursuer_2/loss=1.271, pursuer_3/loss=1.421, pursuer_4/loss=1.149, pursuer_5/loss=1.222, pursuer_6/loss=1.212, pursuer_7/loss=1.317, rew=262.44]                           


Best Saved
Epoch #126: test_reward: 312.891938 ± 187.000266, best_reward: 312.891938 ± 187.000266 in #126


Epoch #127: 20001it [00:56, 351.23it/s, env_step=2540000, len=1784, n/ep=0, n/st=400, pursuer_0/loss=1.166, pursuer_1/loss=1.145, pursuer_2/loss=1.273, pursuer_3/loss=1.312, pursuer_4/loss=0.936, pursuer_5/loss=1.164, pursuer_6/loss=1.394, pursuer_7/loss=1.297, rew=522.39]                           


Epoch #127: test_reward: 184.714312 ± 88.539097, best_reward: 312.891938 ± 187.000266 in #126


Epoch #128: 20001it [00:57, 350.14it/s, env_step=2560000, len=1800, n/ep=0, n/st=400, pursuer_0/loss=1.115, pursuer_1/loss=1.113, pursuer_2/loss=1.298, pursuer_3/loss=1.157, pursuer_4/loss=1.057, pursuer_5/loss=1.345, pursuer_6/loss=1.271, pursuer_7/loss=1.319, rew=365.08]                           


Epoch #128: test_reward: 102.376562 ± 163.457108, best_reward: 312.891938 ± 187.000266 in #126


Epoch #129: 20001it [00:58, 342.27it/s, env_step=2580000, len=3592, n/ep=0, n/st=400, pursuer_0/loss=1.091, pursuer_1/loss=1.224, pursuer_2/loss=1.257, pursuer_3/loss=1.373, pursuer_4/loss=1.162, pursuer_5/loss=1.301, pursuer_6/loss=1.340, pursuer_7/loss=1.296, rew=216.33]                           


Epoch #129: test_reward: 306.268375 ± 174.612735, best_reward: 312.891938 ± 187.000266 in #126


Epoch #130: 20001it [00:56, 356.25it/s, env_step=2600000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.041, pursuer_1/loss=1.189, pursuer_2/loss=1.183, pursuer_3/loss=1.264, pursuer_4/loss=1.214, pursuer_5/loss=1.221, pursuer_6/loss=1.378, pursuer_7/loss=1.453, rew=201.08]                           


Epoch #130: test_reward: 258.232625 ± 162.359689, best_reward: 312.891938 ± 187.000266 in #126


Epoch #131: 20001it [00:56, 351.66it/s, env_step=2620000, len=1104, n/ep=0, n/st=400, pursuer_0/loss=1.173, pursuer_1/loss=1.156, pursuer_2/loss=1.289, pursuer_3/loss=1.250, pursuer_4/loss=1.279, pursuer_5/loss=1.234, pursuer_6/loss=1.493, pursuer_7/loss=1.392, rew=598.91]                           


Epoch #131: test_reward: 306.852375 ± 162.140297, best_reward: 312.891938 ± 187.000266 in #126


Epoch #132: 20001it [00:55, 357.52it/s, env_step=2640000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.176, pursuer_1/loss=1.319, pursuer_2/loss=1.296, pursuer_3/loss=1.480, pursuer_4/loss=1.223, pursuer_5/loss=1.167, pursuer_6/loss=1.453, pursuer_7/loss=1.480, rew=70.42]                            


Epoch #132: test_reward: 136.972750 ± 143.144784, best_reward: 312.891938 ± 187.000266 in #126


Epoch #133: 20001it [00:57, 349.72it/s, env_step=2660000, len=1040, n/ep=0, n/st=400, pursuer_0/loss=1.158, pursuer_1/loss=1.331, pursuer_2/loss=1.211, pursuer_3/loss=1.406, pursuer_4/loss=1.202, pursuer_5/loss=1.259, pursuer_6/loss=1.347, pursuer_7/loss=1.436, rew=719.82]                           


Epoch #133: test_reward: 200.481437 ± 154.446362, best_reward: 312.891938 ± 187.000266 in #126


Epoch #134: 20001it [00:56, 352.28it/s, env_step=2680000, len=3640, n/ep=0, n/st=400, pursuer_0/loss=1.150, pursuer_1/loss=1.248, pursuer_2/loss=1.350, pursuer_3/loss=1.271, pursuer_4/loss=1.216, pursuer_5/loss=1.222, pursuer_6/loss=1.338, pursuer_7/loss=1.401, rew=280.61]                           


Best Saved
Epoch #134: test_reward: 373.474437 ± 130.806359, best_reward: 373.474437 ± 130.806359 in #134


Epoch #135: 20001it [00:56, 352.52it/s, env_step=2700000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.170, pursuer_1/loss=1.300, pursuer_2/loss=1.353, pursuer_3/loss=1.342, pursuer_4/loss=1.255, pursuer_5/loss=1.360, pursuer_6/loss=1.479, pursuer_7/loss=1.382, rew=-1.74]                            


Epoch #135: test_reward: 121.402000 ± 116.931771, best_reward: 373.474437 ± 130.806359 in #134


Epoch #136: 20001it [00:56, 350.91it/s, env_step=2720000, len=3600, n/ep=0, n/st=400, pursuer_0/loss=1.052, pursuer_1/loss=1.286, pursuer_2/loss=1.380, pursuer_3/loss=1.270, pursuer_4/loss=1.004, pursuer_5/loss=1.221, pursuer_6/loss=1.368, pursuer_7/loss=1.372, rew=149.63]                           


Epoch #136: test_reward: 37.136000 ± 78.213349, best_reward: 373.474437 ± 130.806359 in #134


Epoch #137: 20001it [00:57, 349.11it/s, env_step=2740000, len=1768, n/ep=0, n/st=400, pursuer_0/loss=1.104, pursuer_1/loss=1.257, pursuer_2/loss=1.313, pursuer_3/loss=1.276, pursuer_4/loss=1.073, pursuer_5/loss=1.357, pursuer_6/loss=1.455, pursuer_7/loss=1.389, rew=514.31]                           


Epoch #137: test_reward: 92.386750 ± 161.091700, best_reward: 373.474437 ± 130.806359 in #134


Epoch #138: 20001it [00:57, 347.77it/s, env_step=2760000, len=3800, n/ep=0, n/st=400, pursuer_0/loss=1.106, pursuer_1/loss=1.244, pursuer_2/loss=1.349, pursuer_3/loss=1.313, pursuer_4/loss=1.112, pursuer_5/loss=1.293, pursuer_6/loss=1.372, pursuer_7/loss=1.404, rew=226.35]                           


Epoch #138: test_reward: 363.569312 ± 192.489912, best_reward: 373.474437 ± 130.806359 in #134


Epoch #139: 20001it [00:57, 350.56it/s, env_step=2780000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.074, pursuer_1/loss=1.225, pursuer_2/loss=1.381, pursuer_3/loss=1.279, pursuer_4/loss=1.217, pursuer_5/loss=1.294, pursuer_6/loss=1.273, pursuer_7/loss=1.355, rew=148.27]                           


Epoch #139: test_reward: 164.779875 ± 152.556904, best_reward: 373.474437 ± 130.806359 in #134


Epoch #140: 20001it [00:57, 350.01it/s, env_step=2800000, len=2432, n/ep=0, n/st=400, pursuer_0/loss=1.194, pursuer_1/loss=1.227, pursuer_2/loss=1.249, pursuer_3/loss=1.317, pursuer_4/loss=1.259, pursuer_5/loss=1.426, pursuer_6/loss=1.503, pursuer_7/loss=1.426, rew=411.16]                           


Epoch #140: test_reward: 258.871937 ± 164.543229, best_reward: 373.474437 ± 130.806359 in #134


Epoch #141: 20001it [00:56, 352.48it/s, env_step=2820000, len=2876, n/ep=2, n/st=400, pursuer_0/loss=1.321, pursuer_1/loss=1.220, pursuer_2/loss=1.409, pursuer_3/loss=1.377, pursuer_4/loss=1.274, pursuer_5/loss=1.338, pursuer_6/loss=1.389, pursuer_7/loss=1.455, rew=308.34]                           


Epoch #141: test_reward: 110.060687 ± 170.122480, best_reward: 373.474437 ± 130.806359 in #134


Epoch #142: 20001it [00:58, 340.91it/s, env_step=2840000, len=1376, n/ep=0, n/st=400, pursuer_0/loss=1.226, pursuer_1/loss=1.414, pursuer_2/loss=1.356, pursuer_3/loss=1.322, pursuer_4/loss=1.332, pursuer_5/loss=1.393, pursuer_6/loss=1.589, pursuer_7/loss=1.498, rew=452.24]                           


Epoch #142: test_reward: 72.369812 ± 113.069627, best_reward: 373.474437 ± 130.806359 in #134


Epoch #143: 20001it [00:55, 357.49it/s, env_step=2860000, len=3320, n/ep=0, n/st=400, pursuer_0/loss=1.336, pursuer_1/loss=1.291, pursuer_2/loss=1.303, pursuer_3/loss=1.286, pursuer_4/loss=1.277, pursuer_5/loss=1.336, pursuer_6/loss=1.509, pursuer_7/loss=1.521, rew=268.09]                           


Epoch #143: test_reward: 100.126687 ± 77.156720, best_reward: 373.474437 ± 130.806359 in #134


Epoch #144: 20001it [00:57, 348.28it/s, env_step=2880000, len=1144, n/ep=0, n/st=400, pursuer_0/loss=1.238, pursuer_1/loss=1.109, pursuer_2/loss=1.313, pursuer_3/loss=1.395, pursuer_4/loss=1.178, pursuer_5/loss=1.377, pursuer_6/loss=1.313, pursuer_7/loss=1.476, rew=469.85]                           


Epoch #144: test_reward: 300.326313 ± 144.285063, best_reward: 373.474437 ± 130.806359 in #134


Epoch #145: 20001it [00:56, 354.68it/s, env_step=2900000, len=1080, n/ep=0, n/st=400, pursuer_0/loss=1.272, pursuer_1/loss=1.185, pursuer_2/loss=1.276, pursuer_3/loss=1.321, pursuer_4/loss=1.337, pursuer_5/loss=1.295, pursuer_6/loss=1.443, pursuer_7/loss=1.473, rew=680.86]                           


Epoch #145: test_reward: 157.942062 ± 150.102556, best_reward: 373.474437 ± 130.806359 in #134


Epoch #146: 20001it [00:57, 346.17it/s, env_step=2920000, len=3208, n/ep=0, n/st=400, pursuer_0/loss=1.252, pursuer_1/loss=1.210, pursuer_2/loss=1.303, pursuer_3/loss=1.358, pursuer_4/loss=1.300, pursuer_5/loss=1.317, pursuer_6/loss=1.427, pursuer_7/loss=1.422, rew=297.92]                           


Epoch #146: test_reward: 258.759313 ± 173.134174, best_reward: 373.474437 ± 130.806359 in #134


Epoch #147: 20001it [00:56, 355.26it/s, env_step=2940000, len=1368, n/ep=0, n/st=400, pursuer_0/loss=1.180, pursuer_1/loss=1.245, pursuer_2/loss=1.285, pursuer_3/loss=1.404, pursuer_4/loss=1.351, pursuer_5/loss=1.198, pursuer_6/loss=1.252, pursuer_7/loss=1.398, rew=391.70]                           


Epoch #147: test_reward: 235.592125 ± 152.458230, best_reward: 373.474437 ± 130.806359 in #134


Epoch #148: 20001it [00:57, 347.84it/s, env_step=2960000, len=1704, n/ep=0, n/st=400, pursuer_0/loss=1.241, pursuer_1/loss=1.289, pursuer_2/loss=1.388, pursuer_3/loss=1.394, pursuer_4/loss=1.326, pursuer_5/loss=1.371, pursuer_6/loss=1.357, pursuer_7/loss=1.318, rew=411.85]                           


Epoch #148: test_reward: 270.907312 ± 156.401168, best_reward: 373.474437 ± 130.806359 in #134


Epoch #149: 20001it [00:57, 350.00it/s, env_step=2980000, len=2136, n/ep=0, n/st=400, pursuer_0/loss=1.200, pursuer_1/loss=1.223, pursuer_2/loss=1.173, pursuer_3/loss=1.260, pursuer_4/loss=1.164, pursuer_5/loss=1.288, pursuer_6/loss=1.408, pursuer_7/loss=1.276, rew=450.95]                           


Epoch #149: test_reward: 319.781812 ± 174.970495, best_reward: 373.474437 ± 130.806359 in #134


Epoch #150: 20001it [00:56, 353.69it/s, env_step=3000000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.149, pursuer_1/loss=1.177, pursuer_2/loss=1.327, pursuer_3/loss=1.232, pursuer_4/loss=1.265, pursuer_5/loss=1.343, pursuer_6/loss=1.259, pursuer_7/loss=1.362, rew=84.67]                            


Epoch #150: test_reward: 281.146750 ± 154.344995, best_reward: 373.474437 ± 130.806359 in #134


Epoch #151: 20001it [00:57, 346.60it/s, env_step=3020000, len=1672, n/ep=0, n/st=400, pursuer_0/loss=1.255, pursuer_1/loss=1.365, pursuer_2/loss=1.364, pursuer_3/loss=1.392, pursuer_4/loss=1.283, pursuer_5/loss=1.344, pursuer_6/loss=1.597, pursuer_7/loss=1.580, rew=571.48]                           


Epoch #151: test_reward: 287.275562 ± 178.852643, best_reward: 373.474437 ± 130.806359 in #134


Epoch #152: 20001it [00:58, 343.68it/s, env_step=3040000, len=1440, n/ep=0, n/st=400, pursuer_0/loss=1.454, pursuer_1/loss=1.346, pursuer_2/loss=1.373, pursuer_3/loss=1.399, pursuer_4/loss=1.341, pursuer_5/loss=1.471, pursuer_6/loss=1.547, pursuer_7/loss=1.560, rew=411.86]                           


Epoch #152: test_reward: 215.412938 ± 201.545442, best_reward: 373.474437 ± 130.806359 in #134


Epoch #153: 20001it [00:57, 348.90it/s, env_step=3060000, len=2288, n/ep=0, n/st=400, pursuer_0/loss=1.206, pursuer_1/loss=1.304, pursuer_2/loss=1.433, pursuer_3/loss=1.291, pursuer_4/loss=1.453, pursuer_5/loss=1.296, pursuer_6/loss=1.365, pursuer_7/loss=1.464, rew=596.01]                           


Epoch #153: test_reward: 370.449188 ± 157.478954, best_reward: 373.474437 ± 130.806359 in #134


Epoch #154: 20001it [00:57, 349.31it/s, env_step=3080000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.228, pursuer_1/loss=1.505, pursuer_2/loss=1.442, pursuer_3/loss=1.426, pursuer_4/loss=1.383, pursuer_5/loss=1.359, pursuer_6/loss=1.423, pursuer_7/loss=1.570, rew=164.56]                           


Epoch #154: test_reward: 319.325625 ± 163.150623, best_reward: 373.474437 ± 130.806359 in #134


Epoch #155: 20001it [00:57, 345.07it/s, env_step=3100000, len=2620, n/ep=0, n/st=400, pursuer_0/loss=1.164, pursuer_1/loss=1.206, pursuer_2/loss=1.228, pursuer_3/loss=1.182, pursuer_4/loss=1.251, pursuer_5/loss=1.270, pursuer_6/loss=1.267, pursuer_7/loss=1.338, rew=239.70]                           


Epoch #155: test_reward: 167.282562 ± 101.524167, best_reward: 373.474437 ± 130.806359 in #134


Epoch #156: 20001it [00:57, 350.88it/s, env_step=3120000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.144, pursuer_1/loss=1.149, pursuer_2/loss=1.285, pursuer_3/loss=1.345, pursuer_4/loss=1.221, pursuer_5/loss=1.198, pursuer_6/loss=1.325, pursuer_7/loss=1.306, rew=275.60]                           


Epoch #156: test_reward: 259.240500 ± 153.536162, best_reward: 373.474437 ± 130.806359 in #134


Epoch #157: 20001it [00:57, 346.61it/s, env_step=3140000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.189, pursuer_1/loss=1.134, pursuer_2/loss=1.090, pursuer_3/loss=1.242, pursuer_4/loss=1.292, pursuer_5/loss=1.139, pursuer_6/loss=1.307, pursuer_7/loss=1.219, rew=99.66]                            


Epoch #157: test_reward: 372.413562 ± 185.995877, best_reward: 373.474437 ± 130.806359 in #134


Epoch #158: 20001it [00:57, 347.03it/s, env_step=3160000, len=2984, n/ep=0, n/st=400, pursuer_0/loss=1.303, pursuer_1/loss=1.208, pursuer_2/loss=1.326, pursuer_3/loss=1.372, pursuer_4/loss=1.273, pursuer_5/loss=1.318, pursuer_6/loss=1.431, pursuer_7/loss=1.370, rew=457.64]                           


Epoch #158: test_reward: 286.728687 ± 211.726266, best_reward: 373.474437 ± 130.806359 in #134


Epoch #159: 20001it [00:57, 349.88it/s, env_step=3180000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.246, pursuer_1/loss=1.157, pursuer_2/loss=1.260, pursuer_3/loss=1.540, pursuer_4/loss=1.219, pursuer_5/loss=1.387, pursuer_6/loss=1.499, pursuer_7/loss=1.486, rew=149.24]                           


Epoch #159: test_reward: 238.869500 ± 144.330156, best_reward: 373.474437 ± 130.806359 in #134


Epoch #160: 20001it [00:57, 348.54it/s, env_step=3200000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.190, pursuer_1/loss=1.305, pursuer_2/loss=1.306, pursuer_3/loss=1.423, pursuer_4/loss=1.344, pursuer_5/loss=1.382, pursuer_6/loss=1.514, pursuer_7/loss=1.506, rew=90.97]                            


Epoch #160: test_reward: 263.312500 ± 168.033504, best_reward: 373.474437 ± 130.806359 in #134


Epoch #161: 20001it [00:57, 350.74it/s, env_step=3220000, len=2432, n/ep=0, n/st=400, pursuer_0/loss=1.120, pursuer_1/loss=1.176, pursuer_2/loss=1.448, pursuer_3/loss=1.411, pursuer_4/loss=1.378, pursuer_5/loss=1.229, pursuer_6/loss=1.286, pursuer_7/loss=1.317, rew=332.50]                           


Epoch #161: test_reward: 244.969375 ± 153.062706, best_reward: 373.474437 ± 130.806359 in #134


Epoch #162: 20001it [00:58, 343.90it/s, env_step=3240000, len=2400, n/ep=0, n/st=400, pursuer_0/loss=1.253, pursuer_1/loss=1.186, pursuer_2/loss=1.332, pursuer_3/loss=1.365, pursuer_4/loss=1.381, pursuer_5/loss=1.316, pursuer_6/loss=1.267, pursuer_7/loss=1.482, rew=498.86]                           


Epoch #162: test_reward: 311.217187 ± 169.153467, best_reward: 373.474437 ± 130.806359 in #134


Epoch #163: 20001it [00:57, 349.03it/s, env_step=3260000, len=3536, n/ep=0, n/st=400, pursuer_0/loss=1.227, pursuer_1/loss=1.092, pursuer_2/loss=1.275, pursuer_3/loss=1.449, pursuer_4/loss=1.204, pursuer_5/loss=1.244, pursuer_6/loss=1.291, pursuer_7/loss=1.453, rew=401.83]                           


Epoch #163: test_reward: 241.223937 ± 148.843611, best_reward: 373.474437 ± 130.806359 in #134


Epoch #164: 20001it [00:56, 354.08it/s, env_step=3280000, len=3440, n/ep=0, n/st=400, pursuer_0/loss=1.221, pursuer_1/loss=1.239, pursuer_2/loss=1.331, pursuer_3/loss=1.437, pursuer_4/loss=1.303, pursuer_5/loss=1.247, pursuer_6/loss=1.316, pursuer_7/loss=1.440, rew=370.01]                           


Epoch #164: test_reward: 163.458187 ± 155.246717, best_reward: 373.474437 ± 130.806359 in #134


Epoch #165: 20001it [00:58, 344.73it/s, env_step=3300000, len=1256, n/ep=0, n/st=400, pursuer_0/loss=1.021, pursuer_1/loss=1.160, pursuer_2/loss=1.164, pursuer_3/loss=1.341, pursuer_4/loss=1.239, pursuer_5/loss=1.332, pursuer_6/loss=1.254, pursuer_7/loss=1.271, rew=564.17]                           


Epoch #165: test_reward: 318.533437 ± 187.314056, best_reward: 373.474437 ± 130.806359 in #134


Epoch #166: 20001it [00:58, 342.50it/s, env_step=3320000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.186, pursuer_1/loss=1.158, pursuer_2/loss=1.145, pursuer_3/loss=1.383, pursuer_4/loss=1.331, pursuer_5/loss=1.180, pursuer_6/loss=1.322, pursuer_7/loss=1.422, rew=170.81]                           


Epoch #166: test_reward: 290.467875 ± 130.287412, best_reward: 373.474437 ± 130.806359 in #134


Epoch #167: 20001it [00:57, 347.81it/s, env_step=3340000, len=1024, n/ep=1, n/st=400, pursuer_0/loss=1.128, pursuer_1/loss=1.195, pursuer_2/loss=1.261, pursuer_3/loss=1.440, pursuer_4/loss=1.202, pursuer_5/loss=1.378, pursuer_6/loss=1.321, pursuer_7/loss=1.379, rew=561.13]                           


Epoch #167: test_reward: 270.773250 ± 201.429721, best_reward: 373.474437 ± 130.806359 in #134


Epoch #168: 20001it [00:57, 349.85it/s, env_step=3360000, len=2360, n/ep=0, n/st=400, pursuer_0/loss=1.216, pursuer_1/loss=1.245, pursuer_2/loss=1.224, pursuer_3/loss=1.247, pursuer_4/loss=1.251, pursuer_5/loss=1.292, pursuer_6/loss=1.283, pursuer_7/loss=1.559, rew=412.84]                           


Epoch #168: test_reward: 307.920188 ± 163.835498, best_reward: 373.474437 ± 130.806359 in #134


Epoch #169: 20001it [00:57, 345.66it/s, env_step=3380000, len=1800, n/ep=0, n/st=400, pursuer_0/loss=1.003, pursuer_1/loss=1.219, pursuer_2/loss=1.289, pursuer_3/loss=1.367, pursuer_4/loss=1.277, pursuer_5/loss=1.325, pursuer_6/loss=1.408, pursuer_7/loss=1.471, rew=714.13]                           


Epoch #169: test_reward: 311.275375 ± 171.254514, best_reward: 373.474437 ± 130.806359 in #134


Epoch #170: 20001it [00:57, 350.78it/s, env_step=3400000, len=1440, n/ep=1, n/st=400, pursuer_0/loss=1.093, pursuer_1/loss=1.167, pursuer_2/loss=1.379, pursuer_3/loss=1.076, pursuer_4/loss=1.218, pursuer_5/loss=1.294, pursuer_6/loss=1.265, pursuer_7/loss=1.408, rew=586.27]                           


Epoch #170: test_reward: 193.396188 ± 130.331970, best_reward: 373.474437 ± 130.806359 in #134


Epoch #171: 20001it [00:58, 342.79it/s, env_step=3420000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.258, pursuer_1/loss=1.238, pursuer_2/loss=1.438, pursuer_3/loss=1.383, pursuer_4/loss=1.335, pursuer_5/loss=1.483, pursuer_6/loss=1.518, pursuer_7/loss=1.456, rew=302.92]                           


Epoch #171: test_reward: 202.527687 ± 147.124688, best_reward: 373.474437 ± 130.806359 in #134


Epoch #172: 20001it [00:56, 352.28it/s, env_step=3440000, len=1824, n/ep=1, n/st=400, pursuer_0/loss=1.269, pursuer_1/loss=1.346, pursuer_2/loss=1.363, pursuer_3/loss=1.345, pursuer_4/loss=1.293, pursuer_5/loss=1.551, pursuer_6/loss=1.339, pursuer_7/loss=1.440, rew=306.63]                           


Epoch #172: test_reward: 176.535562 ± 111.322331, best_reward: 373.474437 ± 130.806359 in #134


Epoch #173: 20001it [00:58, 342.82it/s, env_step=3460000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.319, pursuer_1/loss=1.400, pursuer_2/loss=1.385, pursuer_3/loss=1.422, pursuer_4/loss=1.454, pursuer_5/loss=1.358, pursuer_6/loss=1.383, pursuer_7/loss=1.460, rew=34.33]                            


Epoch #173: test_reward: 277.194750 ± 181.203153, best_reward: 373.474437 ± 130.806359 in #134


Epoch #174: 20001it [00:58, 344.24it/s, env_step=3480000, len=2816, n/ep=1, n/st=400, pursuer_0/loss=1.238, pursuer_1/loss=1.307, pursuer_2/loss=1.282, pursuer_3/loss=1.298, pursuer_4/loss=1.272, pursuer_5/loss=1.420, pursuer_6/loss=1.452, pursuer_7/loss=1.434, rew=353.45]                           


Epoch #174: test_reward: 335.929938 ± 164.900918, best_reward: 373.474437 ± 130.806359 in #134


Epoch #175: 20001it [00:57, 350.72it/s, env_step=3500000, len=2296, n/ep=0, n/st=400, pursuer_0/loss=1.234, pursuer_1/loss=1.269, pursuer_2/loss=1.376, pursuer_3/loss=1.439, pursuer_4/loss=1.380, pursuer_5/loss=1.526, pursuer_6/loss=1.514, pursuer_7/loss=1.459, rew=396.77]                           


Epoch #175: test_reward: 210.439250 ± 140.393296, best_reward: 373.474437 ± 130.806359 in #134


Epoch #176: 20001it [00:56, 355.01it/s, env_step=3520000, len=1152, n/ep=1, n/st=400, pursuer_0/loss=1.143, pursuer_1/loss=1.318, pursuer_2/loss=1.465, pursuer_3/loss=1.364, pursuer_4/loss=1.213, pursuer_5/loss=1.430, pursuer_6/loss=1.543, pursuer_7/loss=1.439, rew=388.35]                           


Epoch #176: test_reward: 230.570875 ± 182.063778, best_reward: 373.474437 ± 130.806359 in #134


Epoch #177: 20001it [00:56, 352.67it/s, env_step=3540000, len=1832, n/ep=0, n/st=400, pursuer_0/loss=1.282, pursuer_1/loss=1.274, pursuer_2/loss=1.489, pursuer_3/loss=1.394, pursuer_4/loss=1.407, pursuer_5/loss=1.476, pursuer_6/loss=1.640, pursuer_7/loss=1.613, rew=420.41]                           


Epoch #177: test_reward: 244.915250 ± 167.397443, best_reward: 373.474437 ± 130.806359 in #134


Epoch #178: 20001it [00:56, 353.00it/s, env_step=3560000, len=1912, n/ep=0, n/st=400, pursuer_0/loss=1.220, pursuer_1/loss=1.237, pursuer_2/loss=1.259, pursuer_3/loss=1.316, pursuer_4/loss=1.283, pursuer_5/loss=1.343, pursuer_6/loss=1.529, pursuer_7/loss=1.578, rew=509.34]                           


Epoch #178: test_reward: 240.373250 ± 129.529158, best_reward: 373.474437 ± 130.806359 in #134


Epoch #179: 20001it [00:57, 347.79it/s, env_step=3580000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.163, pursuer_1/loss=1.296, pursuer_2/loss=1.522, pursuer_3/loss=1.385, pursuer_4/loss=1.423, pursuer_5/loss=1.363, pursuer_6/loss=1.558, pursuer_7/loss=1.508, rew=171.74]                           


Epoch #179: test_reward: 146.765312 ± 123.422359, best_reward: 373.474437 ± 130.806359 in #134


Epoch #180: 20001it [00:57, 346.09it/s, env_step=3600000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.152, pursuer_1/loss=1.228, pursuer_2/loss=1.316, pursuer_3/loss=1.243, pursuer_4/loss=1.260, pursuer_5/loss=1.439, pursuer_6/loss=1.494, pursuer_7/loss=1.517, rew=159.82]                           


Epoch #180: test_reward: 220.399250 ± 149.872686, best_reward: 373.474437 ± 130.806359 in #134


Epoch #181: 20001it [00:56, 351.47it/s, env_step=3620000, len=2264, n/ep=0, n/st=400, pursuer_0/loss=1.277, pursuer_1/loss=1.388, pursuer_2/loss=1.313, pursuer_3/loss=1.226, pursuer_4/loss=1.214, pursuer_5/loss=1.384, pursuer_6/loss=1.573, pursuer_7/loss=1.393, rew=458.68]                           


Best Saved
Epoch #181: test_reward: 390.347813 ± 141.461095, best_reward: 390.347813 ± 141.461095 in #181


Epoch #182: 20001it [00:57, 348.21it/s, env_step=3640000, len=3500, n/ep=0, n/st=400, pursuer_0/loss=1.202, pursuer_1/loss=1.223, pursuer_2/loss=1.474, pursuer_3/loss=1.429, pursuer_4/loss=1.214, pursuer_5/loss=1.224, pursuer_6/loss=1.419, pursuer_7/loss=1.466, rew=250.59]                           


Epoch #182: test_reward: 227.522250 ± 148.245643, best_reward: 390.347813 ± 141.461095 in #181


Epoch #183: 20001it [00:57, 344.89it/s, env_step=3660000, len=1144, n/ep=0, n/st=400, pursuer_0/loss=1.238, pursuer_1/loss=1.168, pursuer_2/loss=1.341, pursuer_3/loss=1.373, pursuer_4/loss=1.213, pursuer_5/loss=1.390, pursuer_6/loss=1.524, pursuer_7/loss=1.567, rew=485.56]                           


Epoch #183: test_reward: 364.975750 ± 202.848246, best_reward: 390.347813 ± 141.461095 in #181


Epoch #184: 20001it [00:56, 351.72it/s, env_step=3680000, len=3112, n/ep=0, n/st=400, pursuer_0/loss=1.151, pursuer_1/loss=1.242, pursuer_2/loss=1.217, pursuer_3/loss=1.354, pursuer_4/loss=1.198, pursuer_5/loss=1.295, pursuer_6/loss=1.302, pursuer_7/loss=1.339, rew=304.70]                           


Epoch #184: test_reward: 265.822063 ± 184.143667, best_reward: 390.347813 ± 141.461095 in #181


Epoch #185: 20001it [00:56, 353.88it/s, env_step=3700000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.102, pursuer_1/loss=1.151, pursuer_2/loss=1.229, pursuer_3/loss=1.235, pursuer_4/loss=1.140, pursuer_5/loss=1.237, pursuer_6/loss=1.230, pursuer_7/loss=1.310, rew=299.71]                           


Epoch #185: test_reward: 302.058875 ± 162.888484, best_reward: 390.347813 ± 141.461095 in #181


Epoch #186: 20001it [00:57, 350.48it/s, env_step=3720000, len=1640, n/ep=0, n/st=400, pursuer_0/loss=1.236, pursuer_1/loss=1.164, pursuer_2/loss=1.351, pursuer_3/loss=1.182, pursuer_4/loss=1.314, pursuer_5/loss=1.146, pursuer_6/loss=1.342, pursuer_7/loss=1.479, rew=375.91]                           


Epoch #186: test_reward: 237.831437 ± 124.090685, best_reward: 390.347813 ± 141.461095 in #181


Epoch #187: 20001it [00:56, 352.04it/s, env_step=3740000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.147, pursuer_1/loss=1.188, pursuer_2/loss=1.302, pursuer_3/loss=1.307, pursuer_4/loss=1.278, pursuer_5/loss=1.212, pursuer_6/loss=1.118, pursuer_7/loss=1.423, rew=87.13]                            


Epoch #187: test_reward: 156.307062 ± 97.608802, best_reward: 390.347813 ± 141.461095 in #181


Epoch #188: 20001it [00:55, 357.67it/s, env_step=3760000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.040, pursuer_1/loss=1.120, pursuer_2/loss=1.126, pursuer_3/loss=1.275, pursuer_4/loss=1.095, pursuer_5/loss=1.121, pursuer_6/loss=1.243, pursuer_7/loss=1.206, rew=240.20]                           


Epoch #188: test_reward: 292.121312 ± 120.011311, best_reward: 390.347813 ± 141.461095 in #181


Epoch #189: 20001it [00:58, 343.41it/s, env_step=3780000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.050, pursuer_1/loss=1.177, pursuer_2/loss=1.228, pursuer_3/loss=1.256, pursuer_4/loss=1.222, pursuer_5/loss=1.132, pursuer_6/loss=1.267, pursuer_7/loss=1.373, rew=199.19]                           


Epoch #189: test_reward: 344.825000 ± 127.068000, best_reward: 390.347813 ± 141.461095 in #181


Epoch #190: 20001it [00:56, 353.30it/s, env_step=3800000, len=1192, n/ep=0, n/st=400, pursuer_0/loss=1.092, pursuer_1/loss=1.224, pursuer_2/loss=1.202, pursuer_3/loss=1.327, pursuer_4/loss=1.112, pursuer_5/loss=1.120, pursuer_6/loss=1.180, pursuer_7/loss=1.344, rew=571.62]                           


Epoch #190: test_reward: 236.160312 ± 136.636843, best_reward: 390.347813 ± 141.461095 in #181


Epoch #191: 20001it [00:57, 350.26it/s, env_step=3820000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.137, pursuer_1/loss=1.161, pursuer_2/loss=1.113, pursuer_3/loss=1.374, pursuer_4/loss=1.253, pursuer_5/loss=1.214, pursuer_6/loss=1.332, pursuer_7/loss=1.362, rew=88.40]                            


Best Saved
Epoch #191: test_reward: 438.591625 ± 177.855992, best_reward: 438.591625 ± 177.855992 in #191


Epoch #192: 20001it [00:56, 355.90it/s, env_step=3840000, len=1664, n/ep=0, n/st=400, pursuer_0/loss=1.038, pursuer_1/loss=1.053, pursuer_2/loss=1.197, pursuer_3/loss=1.264, pursuer_4/loss=1.160, pursuer_5/loss=1.005, pursuer_6/loss=1.130, pursuer_7/loss=1.242, rew=437.95]                           


Epoch #192: test_reward: 357.726000 ± 155.798998, best_reward: 438.591625 ± 177.855992 in #191


Epoch #193: 20001it [00:56, 355.87it/s, env_step=3860000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.014, pursuer_1/loss=1.077, pursuer_2/loss=1.193, pursuer_3/loss=1.261, pursuer_4/loss=1.170, pursuer_5/loss=1.095, pursuer_6/loss=1.209, pursuer_7/loss=1.282, rew=200.87]                           


Epoch #193: test_reward: 266.573562 ± 153.153572, best_reward: 438.591625 ± 177.855992 in #191


Epoch #194: 20001it [00:56, 352.67it/s, env_step=3880000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.226, pursuer_1/loss=1.233, pursuer_2/loss=1.124, pursuer_3/loss=1.198, pursuer_4/loss=1.140, pursuer_5/loss=1.295, pursuer_6/loss=1.082, pursuer_7/loss=1.278, rew=266.47]                           


Epoch #194: test_reward: 169.674000 ± 140.725745, best_reward: 438.591625 ± 177.855992 in #191


Epoch #195: 20001it [00:57, 350.12it/s, env_step=3900000, len=3600, n/ep=0, n/st=400, pursuer_0/loss=1.123, pursuer_1/loss=1.148, pursuer_2/loss=1.213, pursuer_3/loss=1.186, pursuer_4/loss=1.204, pursuer_5/loss=1.102, pursuer_6/loss=1.220, pursuer_7/loss=1.403, rew=160.58]                           


Epoch #195: test_reward: 420.723312 ± 152.622652, best_reward: 438.591625 ± 177.855992 in #191


Epoch #196: 20001it [00:57, 348.21it/s, env_step=3920000, len=1312, n/ep=0, n/st=400, pursuer_0/loss=1.096, pursuer_1/loss=1.174, pursuer_2/loss=1.254, pursuer_3/loss=1.253, pursuer_4/loss=1.193, pursuer_5/loss=1.131, pursuer_6/loss=1.368, pursuer_7/loss=1.330, rew=449.15]                           


Epoch #196: test_reward: 265.486875 ± 198.517130, best_reward: 438.591625 ± 177.855992 in #191


Epoch #197: 20001it [00:57, 346.41it/s, env_step=3940000, len=1864, n/ep=0, n/st=400, pursuer_0/loss=1.167, pursuer_1/loss=1.221, pursuer_2/loss=1.221, pursuer_3/loss=1.368, pursuer_4/loss=1.170, pursuer_5/loss=1.198, pursuer_6/loss=1.246, pursuer_7/loss=1.274, rew=539.54]                           


Epoch #197: test_reward: 309.292687 ± 163.729669, best_reward: 438.591625 ± 177.855992 in #191


Epoch #198: 20001it [00:57, 347.22it/s, env_step=3960000, len=1312, n/ep=0, n/st=400, pursuer_0/loss=1.130, pursuer_1/loss=1.145, pursuer_2/loss=1.285, pursuer_3/loss=1.241, pursuer_4/loss=1.283, pursuer_5/loss=1.236, pursuer_6/loss=1.277, pursuer_7/loss=1.331, rew=433.18]                           


Epoch #198: test_reward: 321.896250 ± 154.742684, best_reward: 438.591625 ± 177.855992 in #191


Epoch #199: 20001it [00:57, 347.85it/s, env_step=3980000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.322, pursuer_1/loss=1.288, pursuer_2/loss=1.328, pursuer_3/loss=1.345, pursuer_4/loss=1.373, pursuer_5/loss=1.331, pursuer_6/loss=1.525, pursuer_7/loss=1.420, rew=351.77]                           


Epoch #199: test_reward: 311.458625 ± 148.568811, best_reward: 438.591625 ± 177.855992 in #191


Epoch #200: 20001it [00:57, 350.14it/s, env_step=4000000, len=1264, n/ep=0, n/st=400, pursuer_0/loss=1.169, pursuer_1/loss=1.331, pursuer_2/loss=1.537, pursuer_3/loss=1.381, pursuer_4/loss=1.255, pursuer_5/loss=1.284, pursuer_6/loss=1.519, pursuer_7/loss=1.442, rew=437.71]                           


Epoch #200: test_reward: 139.885437 ± 144.793582, best_reward: 438.591625 ± 177.855992 in #191


Epoch #201: 20001it [00:56, 352.25it/s, env_step=4020000, len=1136, n/ep=0, n/st=400, pursuer_0/loss=1.145, pursuer_1/loss=1.219, pursuer_2/loss=1.386, pursuer_3/loss=1.288, pursuer_4/loss=1.295, pursuer_5/loss=1.380, pursuer_6/loss=1.395, pursuer_7/loss=1.394, rew=530.88]                           


Epoch #201: test_reward: 247.673375 ± 141.583711, best_reward: 438.591625 ± 177.855992 in #191


Epoch #202: 20001it [00:56, 353.35it/s, env_step=4040000, len=1448, n/ep=0, n/st=400, pursuer_0/loss=1.222, pursuer_1/loss=1.276, pursuer_2/loss=1.489, pursuer_3/loss=1.426, pursuer_4/loss=1.429, pursuer_5/loss=1.428, pursuer_6/loss=1.437, pursuer_7/loss=1.487, rew=506.48]                           


Epoch #202: test_reward: 235.121062 ± 118.986889, best_reward: 438.591625 ± 177.855992 in #191


Epoch #203: 20001it [00:58, 344.47it/s, env_step=4060000, len=1528, n/ep=0, n/st=400, pursuer_0/loss=1.371, pursuer_1/loss=1.560, pursuer_2/loss=1.448, pursuer_3/loss=1.570, pursuer_4/loss=1.529, pursuer_5/loss=1.603, pursuer_6/loss=1.673, pursuer_7/loss=1.704, rew=333.17]                           


Epoch #203: test_reward: 419.718375 ± 169.890700, best_reward: 438.591625 ± 177.855992 in #191


Epoch #204: 20001it [00:57, 349.80it/s, env_step=4080000, len=3328, n/ep=0, n/st=400, pursuer_0/loss=1.395, pursuer_1/loss=1.360, pursuer_2/loss=1.412, pursuer_3/loss=1.515, pursuer_4/loss=1.395, pursuer_5/loss=1.433, pursuer_6/loss=1.454, pursuer_7/loss=1.510, rew=146.82]                           


Epoch #204: test_reward: 244.774000 ± 116.435366, best_reward: 438.591625 ± 177.855992 in #191


Epoch #205: 20001it [00:56, 351.36it/s, env_step=4100000, len=3584, n/ep=0, n/st=400, pursuer_0/loss=1.289, pursuer_1/loss=1.321, pursuer_2/loss=1.423, pursuer_3/loss=1.465, pursuer_4/loss=1.248, pursuer_5/loss=1.435, pursuer_6/loss=1.478, pursuer_7/loss=1.396, rew=162.54]                           


Epoch #205: test_reward: 407.440250 ± 157.921528, best_reward: 438.591625 ± 177.855992 in #191


Epoch #206: 20001it [00:56, 356.21it/s, env_step=4120000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.311, pursuer_1/loss=1.417, pursuer_2/loss=1.564, pursuer_3/loss=1.701, pursuer_4/loss=1.374, pursuer_5/loss=1.519, pursuer_6/loss=1.752, pursuer_7/loss=1.673, rew=183.49]                           


Epoch #206: test_reward: 212.838687 ± 132.311325, best_reward: 438.591625 ± 177.855992 in #191


Epoch #207: 20001it [00:57, 346.51it/s, env_step=4140000, len=1216, n/ep=0, n/st=400, pursuer_0/loss=1.402, pursuer_1/loss=1.441, pursuer_2/loss=1.620, pursuer_3/loss=1.740, pursuer_4/loss=1.318, pursuer_5/loss=1.632, pursuer_6/loss=1.704, pursuer_7/loss=1.712, rew=587.52]                           


Epoch #207: test_reward: 358.344125 ± 97.618864, best_reward: 438.591625 ± 177.855992 in #191


Epoch #208: 20001it [00:56, 352.63it/s, env_step=4160000, len=3848, n/ep=1, n/st=400, pursuer_0/loss=1.488, pursuer_1/loss=1.444, pursuer_2/loss=1.431, pursuer_3/loss=1.531, pursuer_4/loss=1.570, pursuer_5/loss=1.588, pursuer_6/loss=1.567, pursuer_7/loss=1.545, rew=193.15]                           


Epoch #208: test_reward: 329.161438 ± 167.717690, best_reward: 438.591625 ± 177.855992 in #191


Epoch #209: 20001it [00:57, 348.11it/s, env_step=4180000, len=2368, n/ep=0, n/st=400, pursuer_0/loss=1.255, pursuer_1/loss=1.485, pursuer_2/loss=1.649, pursuer_3/loss=1.439, pursuer_4/loss=1.436, pursuer_5/loss=1.764, pursuer_6/loss=1.551, pursuer_7/loss=1.604, rew=330.33]                           


Epoch #209: test_reward: 298.142875 ± 133.930363, best_reward: 438.591625 ± 177.855992 in #191


Epoch #210: 20001it [00:56, 351.83it/s, env_step=4200000, len=1520, n/ep=0, n/st=400, pursuer_0/loss=1.298, pursuer_1/loss=1.507, pursuer_2/loss=1.491, pursuer_3/loss=1.560, pursuer_4/loss=1.458, pursuer_5/loss=1.666, pursuer_6/loss=1.642, pursuer_7/loss=1.531, rew=443.31]                           


Epoch #210: test_reward: 338.572937 ± 117.124136, best_reward: 438.591625 ± 177.855992 in #191


Epoch #211: 20001it [00:56, 354.13it/s, env_step=4220000, len=2696, n/ep=0, n/st=400, pursuer_0/loss=1.434, pursuer_1/loss=1.373, pursuer_2/loss=1.468, pursuer_3/loss=1.466, pursuer_4/loss=1.390, pursuer_5/loss=1.435, pursuer_6/loss=1.761, pursuer_7/loss=1.457, rew=280.60]                           


Epoch #211: test_reward: 393.305750 ± 94.139892, best_reward: 438.591625 ± 177.855992 in #191


Epoch #212: 20001it [00:58, 344.80it/s, env_step=4240000, len=1640, n/ep=0, n/st=400, pursuer_0/loss=1.516, pursuer_1/loss=1.478, pursuer_2/loss=1.631, pursuer_3/loss=1.476, pursuer_4/loss=1.424, pursuer_5/loss=1.585, pursuer_6/loss=1.565, pursuer_7/loss=1.542, rew=446.09]                           


Epoch #212: test_reward: 343.963937 ± 158.769168, best_reward: 438.591625 ± 177.855992 in #191


Epoch #213: 20001it [00:56, 355.00it/s, env_step=4260000, len=3016, n/ep=0, n/st=400, pursuer_0/loss=1.338, pursuer_1/loss=1.247, pursuer_2/loss=1.353, pursuer_3/loss=1.458, pursuer_4/loss=1.360, pursuer_5/loss=1.433, pursuer_6/loss=1.304, pursuer_7/loss=1.417, rew=279.51]                           


Epoch #213: test_reward: 252.511437 ± 144.053263, best_reward: 438.591625 ± 177.855992 in #191


Epoch #214: 20001it [00:56, 356.54it/s, env_step=4280000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.277, pursuer_1/loss=1.201, pursuer_2/loss=1.257, pursuer_3/loss=1.402, pursuer_4/loss=1.286, pursuer_5/loss=1.593, pursuer_6/loss=1.354, pursuer_7/loss=1.406, rew=204.11]                           


Epoch #214: test_reward: 181.355375 ± 117.159104, best_reward: 438.591625 ± 177.855992 in #191


Epoch #215: 20001it [00:57, 349.95it/s, env_step=4300000, len=1616, n/ep=0, n/st=400, pursuer_0/loss=1.372, pursuer_1/loss=1.290, pursuer_2/loss=1.255, pursuer_3/loss=1.399, pursuer_4/loss=1.327, pursuer_5/loss=1.597, pursuer_6/loss=1.401, pursuer_7/loss=1.580, rew=412.84]                           


Epoch #215: test_reward: 315.170875 ± 190.911666, best_reward: 438.591625 ± 177.855992 in #191


Epoch #216: 20001it [00:57, 349.18it/s, env_step=4320000, len=1736, n/ep=0, n/st=400, pursuer_0/loss=1.241, pursuer_1/loss=1.268, pursuer_2/loss=1.336, pursuer_3/loss=1.402, pursuer_4/loss=1.335, pursuer_5/loss=1.419, pursuer_6/loss=1.515, pursuer_7/loss=1.530, rew=376.47]                           


Epoch #216: test_reward: 328.887875 ± 159.112809, best_reward: 438.591625 ± 177.855992 in #191


Epoch #217: 20001it [00:55, 358.69it/s, env_step=4340000, len=2280, n/ep=0, n/st=400, pursuer_0/loss=1.184, pursuer_1/loss=1.082, pursuer_2/loss=1.071, pursuer_3/loss=1.386, pursuer_4/loss=1.106, pursuer_5/loss=1.260, pursuer_6/loss=1.258, pursuer_7/loss=1.394, rew=382.45]                           


Epoch #217: test_reward: 239.525812 ± 176.362255, best_reward: 438.591625 ± 177.855992 in #191


Epoch #218: 20001it [00:57, 346.78it/s, env_step=4360000, len=1264, n/ep=0, n/st=400, pursuer_0/loss=1.268, pursuer_1/loss=1.203, pursuer_2/loss=1.442, pursuer_3/loss=1.323, pursuer_4/loss=1.246, pursuer_5/loss=1.527, pursuer_6/loss=1.359, pursuer_7/loss=1.230, rew=482.86]                           


Epoch #218: test_reward: 362.852937 ± 210.248356, best_reward: 438.591625 ± 177.855992 in #191


Epoch #219: 20001it [00:57, 348.68it/s, env_step=4380000, len=1912, n/ep=2, n/st=400, pursuer_0/loss=1.141, pursuer_1/loss=1.148, pursuer_2/loss=1.134, pursuer_3/loss=1.233, pursuer_4/loss=1.174, pursuer_5/loss=1.255, pursuer_6/loss=1.247, pursuer_7/loss=1.382, rew=267.06]                           


Epoch #219: test_reward: 392.903187 ± 173.584417, best_reward: 438.591625 ± 177.855992 in #191


Epoch #220: 20001it [00:58, 344.45it/s, env_step=4400000, len=1816, n/ep=0, n/st=400, pursuer_0/loss=1.256, pursuer_1/loss=1.110, pursuer_2/loss=1.264, pursuer_3/loss=1.140, pursuer_4/loss=1.266, pursuer_5/loss=1.388, pursuer_6/loss=1.304, pursuer_7/loss=1.430, rew=493.64]                           


Epoch #220: test_reward: 331.124500 ± 166.041673, best_reward: 438.591625 ± 177.855992 in #191


Epoch #221: 20001it [00:56, 357.12it/s, env_step=4420000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.346, pursuer_1/loss=1.295, pursuer_2/loss=1.080, pursuer_3/loss=1.326, pursuer_4/loss=1.401, pursuer_5/loss=1.314, pursuer_6/loss=1.516, pursuer_7/loss=1.331, rew=95.79]                            


Epoch #221: test_reward: 242.381937 ± 115.212741, best_reward: 438.591625 ± 177.855992 in #191


Epoch #222: 20001it [00:57, 349.63it/s, env_step=4440000, len=1472, n/ep=0, n/st=400, pursuer_0/loss=1.270, pursuer_1/loss=1.290, pursuer_2/loss=1.368, pursuer_3/loss=1.410, pursuer_4/loss=1.383, pursuer_5/loss=1.528, pursuer_6/loss=1.299, pursuer_7/loss=1.498, rew=417.88]                           


Epoch #222: test_reward: 331.333375 ± 148.156148, best_reward: 438.591625 ± 177.855992 in #191


Epoch #223: 20001it [00:57, 349.05it/s, env_step=4460000, len=1248, n/ep=0, n/st=400, pursuer_0/loss=1.282, pursuer_1/loss=1.296, pursuer_2/loss=1.274, pursuer_3/loss=1.337, pursuer_4/loss=1.370, pursuer_5/loss=1.245, pursuer_6/loss=1.525, pursuer_7/loss=1.438, rew=379.69]                           


Epoch #223: test_reward: 160.512062 ± 124.884462, best_reward: 438.591625 ± 177.855992 in #191


Epoch #224: 20001it [00:56, 352.53it/s, env_step=4480000, len=1376, n/ep=1, n/st=400, pursuer_0/loss=1.248, pursuer_1/loss=1.200, pursuer_2/loss=1.236, pursuer_3/loss=1.393, pursuer_4/loss=1.288, pursuer_5/loss=1.298, pursuer_6/loss=1.492, pursuer_7/loss=1.510, rew=637.20]                           


Epoch #224: test_reward: 357.108875 ± 152.566919, best_reward: 438.591625 ± 177.855992 in #191


Epoch #225: 20001it [00:57, 344.92it/s, env_step=4500000, len=1816, n/ep=0, n/st=400, pursuer_0/loss=1.381, pursuer_1/loss=1.310, pursuer_2/loss=1.361, pursuer_3/loss=1.534, pursuer_4/loss=1.530, pursuer_5/loss=1.512, pursuer_6/loss=1.536, pursuer_7/loss=1.599, rew=420.00]                           


Epoch #225: test_reward: 166.299125 ± 173.437694, best_reward: 438.591625 ± 177.855992 in #191


Epoch #226: 20001it [00:57, 348.99it/s, env_step=4520000, len=816, n/ep=1, n/st=400, pursuer_0/loss=1.253, pursuer_1/loss=1.475, pursuer_2/loss=1.389, pursuer_3/loss=1.403, pursuer_4/loss=1.417, pursuer_5/loss=1.397, pursuer_6/loss=1.523, pursuer_7/loss=1.705, rew=572.35]                            


Epoch #226: test_reward: 224.597437 ± 184.855300, best_reward: 438.591625 ± 177.855992 in #191


Epoch #227: 20001it [00:57, 345.67it/s, env_step=4540000, len=848, n/ep=0, n/st=400, pursuer_0/loss=1.432, pursuer_1/loss=1.555, pursuer_2/loss=1.416, pursuer_3/loss=1.643, pursuer_4/loss=1.471, pursuer_5/loss=1.561, pursuer_6/loss=1.537, pursuer_7/loss=1.524, rew=668.38]                            


Epoch #227: test_reward: 245.002312 ± 192.630292, best_reward: 438.591625 ± 177.855992 in #191


Epoch #228: 20001it [00:58, 341.74it/s, env_step=4560000, len=1488, n/ep=0, n/st=400, pursuer_0/loss=1.399, pursuer_1/loss=1.421, pursuer_2/loss=1.365, pursuer_3/loss=1.430, pursuer_4/loss=1.536, pursuer_5/loss=1.559, pursuer_6/loss=1.577, pursuer_7/loss=1.618, rew=370.59]                           


Epoch #228: test_reward: 260.275562 ± 191.285454, best_reward: 438.591625 ± 177.855992 in #191


Epoch #229: 20001it [00:58, 344.04it/s, env_step=4580000, len=976, n/ep=0, n/st=400, pursuer_0/loss=1.315, pursuer_1/loss=1.598, pursuer_2/loss=1.627, pursuer_3/loss=1.674, pursuer_4/loss=1.523, pursuer_5/loss=1.554, pursuer_6/loss=1.610, pursuer_7/loss=1.839, rew=731.43]                            


Epoch #229: test_reward: 293.200375 ± 157.914358, best_reward: 438.591625 ± 177.855992 in #191


Epoch #230: 20001it [00:57, 347.50it/s, env_step=4600000, len=2424, n/ep=0, n/st=400, pursuer_0/loss=1.337, pursuer_1/loss=1.505, pursuer_2/loss=1.553, pursuer_3/loss=1.454, pursuer_4/loss=1.498, pursuer_5/loss=1.541, pursuer_6/loss=1.536, pursuer_7/loss=1.770, rew=292.73]                           


Epoch #230: test_reward: 396.557062 ± 181.841473, best_reward: 438.591625 ± 177.855992 in #191


Epoch #231: 20001it [00:57, 350.43it/s, env_step=4620000, len=2136, n/ep=0, n/st=400, pursuer_0/loss=1.397, pursuer_1/loss=1.562, pursuer_2/loss=1.480, pursuer_3/loss=1.645, pursuer_4/loss=1.463, pursuer_5/loss=1.581, pursuer_6/loss=1.555, pursuer_7/loss=1.818, rew=311.81]                           


Epoch #231: test_reward: 281.560187 ± 153.869964, best_reward: 438.591625 ± 177.855992 in #191


Epoch #232: 20001it [00:58, 342.65it/s, env_step=4640000, len=2056, n/ep=1, n/st=400, pursuer_0/loss=1.465, pursuer_1/loss=1.358, pursuer_2/loss=1.526, pursuer_3/loss=1.665, pursuer_4/loss=1.526, pursuer_5/loss=1.660, pursuer_6/loss=1.537, pursuer_7/loss=1.559, rew=589.58]                           


Best Saved
Epoch #232: test_reward: 439.528750 ± 128.345187, best_reward: 439.528750 ± 128.345187 in #232


Epoch #233: 20001it [00:57, 347.26it/s, env_step=4660000, len=1944, n/ep=0, n/st=400, pursuer_0/loss=1.476, pursuer_1/loss=1.568, pursuer_2/loss=1.520, pursuer_3/loss=1.647, pursuer_4/loss=1.655, pursuer_5/loss=1.584, pursuer_6/loss=1.681, pursuer_7/loss=1.713, rew=336.32]                           


Best Saved
Epoch #233: test_reward: 497.475500 ± 161.005507, best_reward: 497.475500 ± 161.005507 in #233


Epoch #234: 20001it [00:56, 354.55it/s, env_step=4680000, len=4000, n/ep=1, n/st=400, pursuer_0/loss=1.427, pursuer_1/loss=1.534, pursuer_2/loss=1.547, pursuer_3/loss=1.701, pursuer_4/loss=1.527, pursuer_5/loss=1.683, pursuer_6/loss=1.743, pursuer_7/loss=1.731, rew=175.27]                           


Epoch #234: test_reward: 328.284437 ± 217.561967, best_reward: 497.475500 ± 161.005507 in #233


Epoch #235: 20001it [00:57, 348.88it/s, env_step=4700000, len=1988, n/ep=2, n/st=400, pursuer_0/loss=1.273, pursuer_1/loss=1.422, pursuer_2/loss=1.464, pursuer_3/loss=1.616, pursuer_4/loss=1.510, pursuer_5/loss=1.448, pursuer_6/loss=1.445, pursuer_7/loss=1.635, rew=448.56]                           


Epoch #235: test_reward: 454.717437 ± 152.977625, best_reward: 497.475500 ± 161.005507 in #233


Epoch #236: 20001it [00:57, 347.94it/s, env_step=4720000, len=2392, n/ep=0, n/st=400, pursuer_0/loss=1.373, pursuer_1/loss=1.472, pursuer_2/loss=1.614, pursuer_3/loss=1.698, pursuer_4/loss=1.511, pursuer_5/loss=1.400, pursuer_6/loss=1.562, pursuer_7/loss=1.678, rew=286.05]                           


Epoch #236: test_reward: 229.941875 ± 166.995050, best_reward: 497.475500 ± 161.005507 in #233


Epoch #237: 20001it [00:56, 354.05it/s, env_step=4740000, len=3976, n/ep=0, n/st=400, pursuer_0/loss=1.395, pursuer_1/loss=1.484, pursuer_2/loss=1.464, pursuer_3/loss=1.619, pursuer_4/loss=1.403, pursuer_5/loss=1.431, pursuer_6/loss=1.490, pursuer_7/loss=1.627, rew=227.88]                           


Epoch #237: test_reward: 363.522938 ± 156.461988, best_reward: 497.475500 ± 161.005507 in #233


Epoch #238: 20001it [00:56, 353.52it/s, env_step=4760000, len=1864, n/ep=0, n/st=400, pursuer_0/loss=1.478, pursuer_1/loss=1.595, pursuer_2/loss=1.624, pursuer_3/loss=1.634, pursuer_4/loss=1.418, pursuer_5/loss=1.605, pursuer_6/loss=1.760, pursuer_7/loss=1.772, rew=308.33]                           


Epoch #238: test_reward: 442.608563 ± 207.392420, best_reward: 497.475500 ± 161.005507 in #233


Epoch #239: 20001it [00:57, 350.55it/s, env_step=4780000, len=2912, n/ep=0, n/st=400, pursuer_0/loss=1.276, pursuer_1/loss=1.688, pursuer_2/loss=1.530, pursuer_3/loss=1.785, pursuer_4/loss=1.337, pursuer_5/loss=1.580, pursuer_6/loss=1.538, pursuer_7/loss=1.679, rew=420.55]                           


Epoch #239: test_reward: 223.218687 ± 143.166041, best_reward: 497.475500 ± 161.005507 in #233


Epoch #240: 20001it [00:57, 345.95it/s, env_step=4800000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.208, pursuer_1/loss=1.390, pursuer_2/loss=1.349, pursuer_3/loss=1.434, pursuer_4/loss=1.447, pursuer_5/loss=1.413, pursuer_6/loss=1.410, pursuer_7/loss=1.548, rew=109.31]                           


Epoch #240: test_reward: 248.869625 ± 115.524962, best_reward: 497.475500 ± 161.005507 in #233


Epoch #241: 20001it [00:56, 354.22it/s, env_step=4820000, len=3872, n/ep=0, n/st=400, pursuer_0/loss=1.360, pursuer_1/loss=1.447, pursuer_2/loss=1.547, pursuer_3/loss=1.583, pursuer_4/loss=1.501, pursuer_5/loss=1.421, pursuer_6/loss=1.525, pursuer_7/loss=1.586, rew=214.14]                           


Epoch #241: test_reward: 378.560938 ± 182.073574, best_reward: 497.475500 ± 161.005507 in #233


Epoch #242: 20001it [00:57, 348.96it/s, env_step=4840000, len=1024, n/ep=0, n/st=400, pursuer_0/loss=1.323, pursuer_1/loss=1.239, pursuer_2/loss=1.255, pursuer_3/loss=1.369, pursuer_4/loss=1.276, pursuer_5/loss=1.272, pursuer_6/loss=1.305, pursuer_7/loss=1.410, rew=497.13]                           


Epoch #242: test_reward: 171.182375 ± 122.132895, best_reward: 497.475500 ± 161.005507 in #233


Epoch #243: 20001it [00:57, 348.98it/s, env_step=4860000, len=2320, n/ep=0, n/st=400, pursuer_0/loss=1.300, pursuer_1/loss=1.125, pursuer_2/loss=1.388, pursuer_3/loss=1.425, pursuer_4/loss=1.405, pursuer_5/loss=1.336, pursuer_6/loss=1.298, pursuer_7/loss=1.529, rew=393.44]                           


Epoch #243: test_reward: 372.613938 ± 145.563197, best_reward: 497.475500 ± 161.005507 in #233


Epoch #244: 20001it [00:55, 359.33it/s, env_step=4880000, len=3088, n/ep=0, n/st=400, pursuer_0/loss=1.194, pursuer_1/loss=1.271, pursuer_2/loss=1.244, pursuer_3/loss=1.431, pursuer_4/loss=1.236, pursuer_5/loss=1.271, pursuer_6/loss=1.336, pursuer_7/loss=1.275, rew=450.48]                           


Epoch #244: test_reward: 292.376812 ± 132.894738, best_reward: 497.475500 ± 161.005507 in #233


Epoch #245: 20001it [00:58, 343.81it/s, env_step=4900000, len=1008, n/ep=0, n/st=400, pursuer_0/loss=1.248, pursuer_1/loss=1.461, pursuer_2/loss=1.259, pursuer_3/loss=1.447, pursuer_4/loss=1.228, pursuer_5/loss=1.449, pursuer_6/loss=1.514, pursuer_7/loss=1.399, rew=404.09]                           


Epoch #245: test_reward: 239.083937 ± 140.069378, best_reward: 497.475500 ± 161.005507 in #233


Epoch #246: 20001it [00:56, 351.54it/s, env_step=4920000, len=1608, n/ep=0, n/st=400, pursuer_0/loss=1.270, pursuer_1/loss=1.332, pursuer_2/loss=1.431, pursuer_3/loss=1.521, pursuer_4/loss=1.349, pursuer_5/loss=1.376, pursuer_6/loss=1.402, pursuer_7/loss=1.581, rew=363.90]                           


Epoch #246: test_reward: 308.233563 ± 155.571162, best_reward: 497.475500 ± 161.005507 in #233


Epoch #247: 20001it [00:57, 345.85it/s, env_step=4940000, len=1128, n/ep=0, n/st=400, pursuer_0/loss=1.334, pursuer_1/loss=1.363, pursuer_2/loss=1.289, pursuer_3/loss=1.491, pursuer_4/loss=1.312, pursuer_5/loss=1.267, pursuer_6/loss=1.378, pursuer_7/loss=1.531, rew=675.83]                           


Epoch #247: test_reward: 281.159375 ± 159.347132, best_reward: 497.475500 ± 161.005507 in #233


Epoch #248: 20001it [00:56, 356.33it/s, env_step=4960000, len=872, n/ep=0, n/st=400, pursuer_0/loss=1.227, pursuer_1/loss=1.343, pursuer_2/loss=1.294, pursuer_3/loss=1.377, pursuer_4/loss=1.089, pursuer_5/loss=1.254, pursuer_6/loss=1.343, pursuer_7/loss=1.423, rew=702.37]                            


Epoch #248: test_reward: 242.405187 ± 182.749802, best_reward: 497.475500 ± 161.005507 in #233


Epoch #249: 20001it [00:56, 352.84it/s, env_step=4980000, len=1192, n/ep=0, n/st=400, pursuer_0/loss=1.236, pursuer_1/loss=1.209, pursuer_2/loss=1.306, pursuer_3/loss=1.372, pursuer_4/loss=1.257, pursuer_5/loss=1.219, pursuer_6/loss=1.387, pursuer_7/loss=1.579, rew=580.74]                           


Epoch #249: test_reward: 311.180813 ± 142.180187, best_reward: 497.475500 ± 161.005507 in #233


Epoch #250: 20001it [00:56, 351.92it/s, env_step=5000000, len=2232, n/ep=0, n/st=400, pursuer_0/loss=1.361, pursuer_1/loss=1.492, pursuer_2/loss=1.563, pursuer_3/loss=1.580, pursuer_4/loss=1.373, pursuer_5/loss=1.430, pursuer_6/loss=1.394, pursuer_7/loss=1.663, rew=311.65]                           


Epoch #250: test_reward: 290.023250 ± 172.204482, best_reward: 497.475500 ± 161.005507 in #233


Epoch #251: 20001it [00:56, 355.41it/s, env_step=5020000, len=2408, n/ep=0, n/st=400, pursuer_0/loss=1.203, pursuer_1/loss=1.365, pursuer_2/loss=1.460, pursuer_3/loss=1.521, pursuer_4/loss=1.221, pursuer_5/loss=1.416, pursuer_6/loss=1.493, pursuer_7/loss=1.430, rew=349.67]                           


Epoch #251: test_reward: 443.295563 ± 162.553432, best_reward: 497.475500 ± 161.005507 in #233


Epoch #252: 20001it [00:56, 355.00it/s, env_step=5040000, len=920, n/ep=1, n/st=400, pursuer_0/loss=1.261, pursuer_1/loss=1.587, pursuer_2/loss=1.454, pursuer_3/loss=1.612, pursuer_4/loss=1.307, pursuer_5/loss=1.366, pursuer_6/loss=1.334, pursuer_7/loss=1.573, rew=442.58]                            


Epoch #252: test_reward: 364.735375 ± 187.898215, best_reward: 497.475500 ± 161.005507 in #233


Epoch #253: 20001it [00:57, 344.98it/s, env_step=5060000, len=712, n/ep=0, n/st=400, pursuer_0/loss=1.294, pursuer_1/loss=1.462, pursuer_2/loss=1.438, pursuer_3/loss=1.606, pursuer_4/loss=1.484, pursuer_5/loss=1.387, pursuer_6/loss=1.380, pursuer_7/loss=1.573, rew=562.02]                            


Epoch #253: test_reward: 259.244125 ± 145.968444, best_reward: 497.475500 ± 161.005507 in #233


Epoch #254: 20001it [00:55, 357.78it/s, env_step=5080000, len=1144, n/ep=0, n/st=400, pursuer_0/loss=1.407, pursuer_1/loss=1.475, pursuer_2/loss=1.561, pursuer_3/loss=1.557, pursuer_4/loss=1.417, pursuer_5/loss=1.459, pursuer_6/loss=1.400, pursuer_7/loss=1.578, rew=390.05]                           


Epoch #254: test_reward: 329.841188 ± 141.867691, best_reward: 497.475500 ± 161.005507 in #233


Epoch #255: 20001it [00:58, 343.42it/s, env_step=5100000, len=1224, n/ep=0, n/st=400, pursuer_0/loss=1.237, pursuer_1/loss=1.226, pursuer_2/loss=1.572, pursuer_3/loss=1.527, pursuer_4/loss=1.372, pursuer_5/loss=1.497, pursuer_6/loss=1.383, pursuer_7/loss=1.556, rew=497.66]                           


Epoch #255: test_reward: 383.929125 ± 128.738405, best_reward: 497.475500 ± 161.005507 in #233


Epoch #256: 20001it [00:57, 349.59it/s, env_step=5120000, len=1344, n/ep=0, n/st=400, pursuer_0/loss=1.262, pursuer_1/loss=1.527, pursuer_2/loss=1.522, pursuer_3/loss=1.625, pursuer_4/loss=1.511, pursuer_5/loss=1.511, pursuer_6/loss=1.536, pursuer_7/loss=1.700, rew=414.19]                           


Epoch #256: test_reward: 303.785500 ± 177.482395, best_reward: 497.475500 ± 161.005507 in #233


Epoch #257: 20001it [00:57, 348.55it/s, env_step=5140000, len=2448, n/ep=0, n/st=400, pursuer_0/loss=1.435, pursuer_1/loss=1.554, pursuer_2/loss=1.431, pursuer_3/loss=1.618, pursuer_4/loss=1.482, pursuer_5/loss=1.559, pursuer_6/loss=1.491, pursuer_7/loss=1.799, rew=289.06]                           


Epoch #257: test_reward: 387.655500 ± 115.111788, best_reward: 497.475500 ± 161.005507 in #233


Epoch #258: 20001it [00:57, 348.63it/s, env_step=5160000, len=1784, n/ep=0, n/st=400, pursuer_0/loss=1.427, pursuer_1/loss=1.420, pursuer_2/loss=1.523, pursuer_3/loss=1.524, pursuer_4/loss=1.444, pursuer_5/loss=1.484, pursuer_6/loss=1.507, pursuer_7/loss=1.718, rew=376.86]                           


Epoch #258: test_reward: 324.605938 ± 165.276508, best_reward: 497.475500 ± 161.005507 in #233


Epoch #259: 20001it [00:56, 351.78it/s, env_step=5180000, len=3856, n/ep=1, n/st=400, pursuer_0/loss=1.429, pursuer_1/loss=1.542, pursuer_2/loss=1.625, pursuer_3/loss=1.630, pursuer_4/loss=1.435, pursuer_5/loss=1.507, pursuer_6/loss=1.542, pursuer_7/loss=1.727, rew=258.46]                           


Epoch #259: test_reward: 312.713812 ± 147.154559, best_reward: 497.475500 ± 161.005507 in #233


Epoch #260: 20001it [00:57, 348.31it/s, env_step=5200000, len=1752, n/ep=0, n/st=400, pursuer_0/loss=1.414, pursuer_1/loss=1.444, pursuer_2/loss=1.564, pursuer_3/loss=1.613, pursuer_4/loss=1.399, pursuer_5/loss=1.428, pursuer_6/loss=1.560, pursuer_7/loss=1.517, rew=334.34]                           


Epoch #260: test_reward: 376.706812 ± 100.200301, best_reward: 497.475500 ± 161.005507 in #233


Epoch #261: 20001it [00:56, 351.02it/s, env_step=5220000, len=3376, n/ep=1, n/st=400, pursuer_0/loss=1.324, pursuer_1/loss=1.273, pursuer_2/loss=1.389, pursuer_3/loss=1.422, pursuer_4/loss=1.320, pursuer_5/loss=1.345, pursuer_6/loss=1.481, pursuer_7/loss=1.649, rew=227.79]                           


Epoch #261: test_reward: 449.671250 ± 200.035330, best_reward: 497.475500 ± 161.005507 in #233


Epoch #262: 20001it [00:58, 344.40it/s, env_step=5240000, len=2408, n/ep=1, n/st=400, pursuer_0/loss=1.444, pursuer_1/loss=1.401, pursuer_2/loss=1.524, pursuer_3/loss=1.457, pursuer_4/loss=1.483, pursuer_5/loss=1.484, pursuer_6/loss=1.524, pursuer_7/loss=1.857, rew=384.69]                           


Epoch #262: test_reward: 346.982000 ± 129.097106, best_reward: 497.475500 ± 161.005507 in #233


Epoch #263: 20001it [00:57, 344.91it/s, env_step=5260000, len=1208, n/ep=0, n/st=400, pursuer_0/loss=1.393, pursuer_1/loss=1.455, pursuer_2/loss=1.556, pursuer_3/loss=1.564, pursuer_4/loss=1.505, pursuer_5/loss=1.528, pursuer_6/loss=1.407, pursuer_7/loss=1.758, rew=432.93]                           


Epoch #263: test_reward: 396.058312 ± 161.809745, best_reward: 497.475500 ± 161.005507 in #233


Epoch #264: 20001it [00:58, 343.09it/s, env_step=5280000, len=1032, n/ep=0, n/st=400, pursuer_0/loss=1.357, pursuer_1/loss=1.621, pursuer_2/loss=1.555, pursuer_3/loss=1.552, pursuer_4/loss=1.491, pursuer_5/loss=1.512, pursuer_6/loss=1.650, pursuer_7/loss=1.620, rew=620.59]                           


Epoch #264: test_reward: 410.215812 ± 97.224530, best_reward: 497.475500 ± 161.005507 in #233


Epoch #265: 20001it [00:56, 350.91it/s, env_step=5300000, len=1360, n/ep=0, n/st=400, pursuer_0/loss=1.376, pursuer_1/loss=1.431, pursuer_2/loss=1.440, pursuer_3/loss=1.710, pursuer_4/loss=1.577, pursuer_5/loss=1.504, pursuer_6/loss=1.513, pursuer_7/loss=1.785, rew=429.02]                           


Epoch #265: test_reward: 303.240250 ± 156.918026, best_reward: 497.475500 ± 161.005507 in #233


Epoch #266: 20001it [00:56, 355.00it/s, env_step=5320000, len=1976, n/ep=0, n/st=400, pursuer_0/loss=1.385, pursuer_1/loss=1.479, pursuer_2/loss=1.572, pursuer_3/loss=1.641, pursuer_4/loss=1.431, pursuer_5/loss=1.666, pursuer_6/loss=1.517, pursuer_7/loss=1.793, rew=377.66]                           


Epoch #266: test_reward: 264.571437 ± 146.157431, best_reward: 497.475500 ± 161.005507 in #233


Epoch #267: 20001it [00:59, 333.37it/s, env_step=5340000, len=1384, n/ep=0, n/st=400, pursuer_0/loss=1.408, pursuer_1/loss=1.673, pursuer_2/loss=1.607, pursuer_3/loss=1.659, pursuer_4/loss=1.576, pursuer_5/loss=1.727, pursuer_6/loss=1.834, pursuer_7/loss=1.928, rew=400.41]                           


Epoch #267: test_reward: 393.265937 ± 171.745337, best_reward: 497.475500 ± 161.005507 in #233


Epoch #268: 20001it [01:00, 331.07it/s, env_step=5360000, len=1320, n/ep=0, n/st=400, pursuer_0/loss=1.455, pursuer_1/loss=1.610, pursuer_2/loss=1.516, pursuer_3/loss=1.684, pursuer_4/loss=1.631, pursuer_5/loss=1.585, pursuer_6/loss=1.692, pursuer_7/loss=1.839, rew=491.22]                           


Epoch #268: test_reward: 373.979250 ± 195.993636, best_reward: 497.475500 ± 161.005507 in #233


Epoch #269: 20001it [00:59, 335.40it/s, env_step=5380000, len=1688, n/ep=0, n/st=400, pursuer_0/loss=1.535, pursuer_1/loss=1.597, pursuer_2/loss=1.718, pursuer_3/loss=1.671, pursuer_4/loss=1.713, pursuer_5/loss=1.742, pursuer_6/loss=1.984, pursuer_7/loss=1.976, rew=397.26]                           


Epoch #269: test_reward: 434.289562 ± 130.390335, best_reward: 497.475500 ± 161.005507 in #233


Epoch #270: 20001it [00:59, 333.61it/s, env_step=5400000, len=1488, n/ep=0, n/st=400, pursuer_0/loss=1.577, pursuer_1/loss=1.767, pursuer_2/loss=1.618, pursuer_3/loss=1.839, pursuer_4/loss=1.754, pursuer_5/loss=1.820, pursuer_6/loss=1.709, pursuer_7/loss=1.926, rew=415.56]                           


Best Saved
Epoch #270: test_reward: 529.346250 ± 128.710129, best_reward: 529.346250 ± 128.710129 in #270


Epoch #271: 20001it [00:59, 337.16it/s, env_step=5420000, len=864, n/ep=0, n/st=400, pursuer_0/loss=1.656, pursuer_1/loss=1.602, pursuer_2/loss=1.765, pursuer_3/loss=1.822, pursuer_4/loss=1.742, pursuer_5/loss=1.918, pursuer_6/loss=1.782, pursuer_7/loss=2.014, rew=502.51]                            


Epoch #271: test_reward: 358.542812 ± 187.123615, best_reward: 529.346250 ± 128.710129 in #270


Epoch #272: 20001it [01:00, 332.38it/s, env_step=5440000, len=920, n/ep=0, n/st=400, pursuer_0/loss=1.563, pursuer_1/loss=1.667, pursuer_2/loss=1.727, pursuer_3/loss=1.837, pursuer_4/loss=1.766, pursuer_5/loss=1.695, pursuer_6/loss=1.774, pursuer_7/loss=1.843, rew=637.20]                            


Epoch #272: test_reward: 383.476750 ± 178.264079, best_reward: 529.346250 ± 128.710129 in #270


Epoch #273: 20001it [00:59, 338.45it/s, env_step=5460000, len=2256, n/ep=0, n/st=400, pursuer_0/loss=1.677, pursuer_1/loss=1.682, pursuer_2/loss=1.889, pursuer_3/loss=1.922, pursuer_4/loss=1.832, pursuer_5/loss=1.851, pursuer_6/loss=1.889, pursuer_7/loss=2.024, rew=440.81]                           


Epoch #273: test_reward: 399.193250 ± 168.162866, best_reward: 529.346250 ± 128.710129 in #270


Epoch #274: 20001it [00:59, 336.49it/s, env_step=5480000, len=1296, n/ep=0, n/st=400, pursuer_0/loss=1.631, pursuer_1/loss=1.632, pursuer_2/loss=1.714, pursuer_3/loss=1.834, pursuer_4/loss=1.720, pursuer_5/loss=1.721, pursuer_6/loss=1.674, pursuer_7/loss=1.898, rew=450.03]                           


Epoch #274: test_reward: 241.171937 ± 134.826147, best_reward: 529.346250 ± 128.710129 in #270


Epoch #275: 20001it [00:58, 341.04it/s, env_step=5500000, len=2360, n/ep=0, n/st=400, pursuer_0/loss=1.605, pursuer_1/loss=1.789, pursuer_2/loss=1.880, pursuer_3/loss=1.961, pursuer_4/loss=1.763, pursuer_5/loss=1.766, pursuer_6/loss=1.980, pursuer_7/loss=1.987, rew=264.10]                           


Epoch #275: test_reward: 326.082938 ± 169.539788, best_reward: 529.346250 ± 128.710129 in #270


Epoch #276: 20001it [00:58, 339.40it/s, env_step=5520000, len=1300, n/ep=2, n/st=400, pursuer_0/loss=1.741, pursuer_1/loss=1.637, pursuer_2/loss=1.769, pursuer_3/loss=1.788, pursuer_4/loss=1.806, pursuer_5/loss=1.769, pursuer_6/loss=1.956, pursuer_7/loss=2.005, rew=678.82]                           


Epoch #276: test_reward: 303.017000 ± 186.521649, best_reward: 529.346250 ± 128.710129 in #270


Epoch #277: 20001it [00:59, 337.12it/s, env_step=5540000, len=1992, n/ep=1, n/st=400, pursuer_0/loss=1.679, pursuer_1/loss=1.682, pursuer_2/loss=1.769, pursuer_3/loss=1.908, pursuer_4/loss=1.736, pursuer_5/loss=1.787, pursuer_6/loss=1.959, pursuer_7/loss=1.891, rew=512.05]                           


Epoch #277: test_reward: 429.323813 ± 154.211305, best_reward: 529.346250 ± 128.710129 in #270


Epoch #278: 20001it [00:59, 336.09it/s, env_step=5560000, len=1328, n/ep=0, n/st=400, pursuer_0/loss=1.635, pursuer_1/loss=1.651, pursuer_2/loss=1.753, pursuer_3/loss=1.719, pursuer_4/loss=1.820, pursuer_5/loss=1.758, pursuer_6/loss=1.741, pursuer_7/loss=1.905, rew=696.77]                           


Epoch #278: test_reward: 467.596500 ± 133.397887, best_reward: 529.346250 ± 128.710129 in #270


Epoch #279: 20001it [00:59, 338.27it/s, env_step=5580000, len=1656, n/ep=1, n/st=400, pursuer_0/loss=1.734, pursuer_1/loss=1.750, pursuer_2/loss=1.879, pursuer_3/loss=1.887, pursuer_4/loss=1.802, pursuer_5/loss=1.740, pursuer_6/loss=1.771, pursuer_7/loss=2.093, rew=543.52]                           


Epoch #279: test_reward: 316.834063 ± 168.545573, best_reward: 529.346250 ± 128.710129 in #270


Epoch #280: 20001it [00:59, 335.46it/s, env_step=5600000, len=1472, n/ep=1, n/st=400, pursuer_0/loss=1.729, pursuer_1/loss=1.850, pursuer_2/loss=1.950, pursuer_3/loss=1.757, pursuer_4/loss=1.805, pursuer_5/loss=1.707, pursuer_6/loss=1.907, pursuer_7/loss=2.188, rew=515.34]                           


Epoch #280: test_reward: 379.543750 ± 162.361435, best_reward: 529.346250 ± 128.710129 in #270


Epoch #281: 20001it [00:59, 335.19it/s, env_step=5620000, len=1632, n/ep=0, n/st=400, pursuer_0/loss=1.781, pursuer_1/loss=1.721, pursuer_2/loss=1.838, pursuer_3/loss=2.054, pursuer_4/loss=1.859, pursuer_5/loss=1.892, pursuer_6/loss=2.175, pursuer_7/loss=2.049, rew=616.90]                           


Epoch #281: test_reward: 327.753563 ± 164.501549, best_reward: 529.346250 ± 128.710129 in #270


Epoch #282: 20001it [00:59, 334.86it/s, env_step=5640000, len=1160, n/ep=0, n/st=400, pursuer_0/loss=1.736, pursuer_1/loss=1.737, pursuer_2/loss=1.796, pursuer_3/loss=1.974, pursuer_4/loss=1.751, pursuer_5/loss=1.728, pursuer_6/loss=2.066, pursuer_7/loss=2.152, rew=422.09]                           


Epoch #282: test_reward: 350.883938 ± 141.477800, best_reward: 529.346250 ± 128.710129 in #270


Epoch #283: 20001it [00:56, 356.44it/s, env_step=5660000, len=2792, n/ep=0, n/st=400, pursuer_0/loss=1.583, pursuer_1/loss=1.647, pursuer_2/loss=1.651, pursuer_3/loss=1.805, pursuer_4/loss=1.895, pursuer_5/loss=1.795, pursuer_6/loss=1.869, pursuer_7/loss=2.043, rew=310.07]                           


Epoch #283: test_reward: 323.387562 ± 245.593899, best_reward: 529.346250 ± 128.710129 in #270


Epoch #284: 20001it [01:06, 299.73it/s, env_step=5680000, len=1232, n/ep=0, n/st=400, pursuer_0/loss=1.696, pursuer_1/loss=1.809, pursuer_2/loss=1.696, pursuer_3/loss=1.806, pursuer_4/loss=1.772, pursuer_5/loss=1.752, pursuer_6/loss=1.898, pursuer_7/loss=2.006, rew=504.90]                           


Epoch #284: test_reward: 475.739500 ± 173.450027, best_reward: 529.346250 ± 128.710129 in #270


Epoch #285: 20001it [01:03, 314.05it/s, env_step=5700000, len=2504, n/ep=0, n/st=400, pursuer_0/loss=1.589, pursuer_1/loss=1.663, pursuer_2/loss=1.800, pursuer_3/loss=1.912, pursuer_4/loss=1.690, pursuer_5/loss=1.906, pursuer_6/loss=1.896, pursuer_7/loss=2.139, rew=429.51]                           


Epoch #285: test_reward: 488.440000 ± 157.737212, best_reward: 529.346250 ± 128.710129 in #270


Epoch #286: 20001it [01:08, 290.41it/s, env_step=5720000, len=1560, n/ep=0, n/st=400, pursuer_0/loss=1.722, pursuer_1/loss=1.808, pursuer_2/loss=1.693, pursuer_3/loss=1.897, pursuer_4/loss=1.746, pursuer_5/loss=1.888, pursuer_6/loss=1.800, pursuer_7/loss=1.882, rew=429.64]                           


Epoch #286: test_reward: 372.929500 ± 183.658512, best_reward: 529.346250 ± 128.710129 in #270


Epoch #287: 20001it [01:07, 295.22it/s, env_step=5740000, len=1520, n/ep=0, n/st=400, pursuer_0/loss=1.623, pursuer_1/loss=1.621, pursuer_2/loss=1.599, pursuer_3/loss=1.841, pursuer_4/loss=1.734, pursuer_5/loss=1.884, pursuer_6/loss=1.704, pursuer_7/loss=2.038, rew=429.49]                           


Epoch #287: test_reward: 428.514063 ± 183.232966, best_reward: 529.346250 ± 128.710129 in #270


Epoch #288: 20001it [01:11, 278.76it/s, env_step=5760000, len=1928, n/ep=0, n/st=400, pursuer_0/loss=1.569, pursuer_1/loss=1.588, pursuer_2/loss=1.611, pursuer_3/loss=1.830, pursuer_4/loss=1.655, pursuer_5/loss=1.877, pursuer_6/loss=1.870, pursuer_7/loss=2.106, rew=466.11]                           


Epoch #288: test_reward: 354.483687 ± 216.907867, best_reward: 529.346250 ± 128.710129 in #270


Epoch #289: 20001it [01:14, 267.28it/s, env_step=5780000, len=2256, n/ep=0, n/st=400, pursuer_0/loss=1.668, pursuer_1/loss=1.591, pursuer_2/loss=1.788, pursuer_3/loss=1.819, pursuer_4/loss=1.826, pursuer_5/loss=1.858, pursuer_6/loss=1.975, pursuer_7/loss=1.940, rew=333.72]                           


Epoch #289: test_reward: 425.452000 ± 224.898354, best_reward: 529.346250 ± 128.710129 in #270


Epoch #290: 20001it [01:16, 260.11it/s, env_step=5800000, len=2420, n/ep=2, n/st=400, pursuer_0/loss=1.772, pursuer_1/loss=1.688, pursuer_2/loss=1.823, pursuer_3/loss=1.894, pursuer_4/loss=1.757, pursuer_5/loss=1.848, pursuer_6/loss=1.853, pursuer_7/loss=2.051, rew=409.98]                           


Epoch #290: test_reward: 470.333938 ± 226.375744, best_reward: 529.346250 ± 128.710129 in #270


Epoch #291: 20001it [01:14, 267.34it/s, env_step=5820000, len=1568, n/ep=0, n/st=400, pursuer_0/loss=1.533, pursuer_1/loss=1.658, pursuer_2/loss=1.672, pursuer_3/loss=1.633, pursuer_4/loss=1.684, pursuer_5/loss=1.843, pursuer_6/loss=1.756, pursuer_7/loss=2.021, rew=663.34]                           


Epoch #291: test_reward: 426.990000 ± 205.522738, best_reward: 529.346250 ± 128.710129 in #270


Epoch #292: 20001it [01:10, 285.39it/s, env_step=5840000, len=1928, n/ep=0, n/st=400, pursuer_0/loss=1.628, pursuer_1/loss=1.523, pursuer_2/loss=1.791, pursuer_3/loss=1.748, pursuer_4/loss=1.777, pursuer_5/loss=1.939, pursuer_6/loss=1.895, pursuer_7/loss=2.063, rew=646.66]                           


Epoch #292: test_reward: 363.475813 ± 171.946685, best_reward: 529.346250 ± 128.710129 in #270


Epoch #293: 20001it [01:12, 277.21it/s, env_step=5860000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.356, pursuer_1/loss=1.499, pursuer_2/loss=1.638, pursuer_3/loss=1.644, pursuer_4/loss=1.662, pursuer_5/loss=1.750, pursuer_6/loss=1.715, pursuer_7/loss=1.793, rew=129.93]                           


Epoch #293: test_reward: 481.884188 ± 164.932918, best_reward: 529.346250 ± 128.710129 in #270


Epoch #294: 20001it [01:16, 262.82it/s, env_step=5880000, len=1364, n/ep=0, n/st=400, pursuer_0/loss=1.545, pursuer_1/loss=1.579, pursuer_2/loss=1.591, pursuer_3/loss=1.831, pursuer_4/loss=1.733, pursuer_5/loss=1.702, pursuer_6/loss=1.839, pursuer_7/loss=1.925, rew=490.62]                           


Epoch #294: test_reward: 289.602813 ± 180.893669, best_reward: 529.346250 ± 128.710129 in #270


Epoch #295: 20001it [01:20, 249.52it/s, env_step=5900000, len=1776, n/ep=0, n/st=400, pursuer_0/loss=1.579, pursuer_1/loss=1.555, pursuer_2/loss=1.584, pursuer_3/loss=1.786, pursuer_4/loss=1.621, pursuer_5/loss=1.699, pursuer_6/loss=1.894, pursuer_7/loss=1.959, rew=377.43]                           


Epoch #295: test_reward: 413.076125 ± 194.884376, best_reward: 529.346250 ± 128.710129 in #270


Epoch #296: 20001it [01:16, 262.79it/s, env_step=5920000, len=2984, n/ep=0, n/st=400, pursuer_0/loss=1.605, pursuer_1/loss=1.712, pursuer_2/loss=1.583, pursuer_3/loss=1.873, pursuer_4/loss=1.903, pursuer_5/loss=1.650, pursuer_6/loss=1.741, pursuer_7/loss=2.008, rew=472.10]                           


Epoch #296: test_reward: 396.823250 ± 198.660010, best_reward: 529.346250 ± 128.710129 in #270


Epoch #297: 20001it [01:16, 261.36it/s, env_step=5940000, len=1680, n/ep=0, n/st=400, pursuer_0/loss=1.501, pursuer_1/loss=1.575, pursuer_2/loss=1.736, pursuer_3/loss=1.810, pursuer_4/loss=1.651, pursuer_5/loss=1.798, pursuer_6/loss=1.674, pursuer_7/loss=1.981, rew=590.54]                           


Best Saved
Epoch #297: test_reward: 534.700625 ± 166.211455, best_reward: 534.700625 ± 166.211455 in #297


Epoch #298: 20001it [01:11, 280.09it/s, env_step=5960000, len=1256, n/ep=0, n/st=400, pursuer_0/loss=1.467, pursuer_1/loss=1.759, pursuer_2/loss=1.705, pursuer_3/loss=1.886, pursuer_4/loss=1.747, pursuer_5/loss=1.834, pursuer_6/loss=1.964, pursuer_7/loss=2.111, rew=693.20]                           


Best Saved
Epoch #298: test_reward: 567.282688 ± 144.595916, best_reward: 567.282688 ± 144.595916 in #298


Epoch #299: 20001it [01:11, 281.21it/s, env_step=5980000, len=976, n/ep=0, n/st=400, pursuer_0/loss=1.535, pursuer_1/loss=1.633, pursuer_2/loss=1.622, pursuer_3/loss=1.872, pursuer_4/loss=1.763, pursuer_5/loss=1.724, pursuer_6/loss=1.812, pursuer_7/loss=1.895, rew=610.48]                            


Epoch #299: test_reward: 418.550875 ± 221.613508, best_reward: 567.282688 ± 144.595916 in #298


Epoch #300: 20001it [01:07, 297.70it/s, env_step=6000000, len=1800, n/ep=0, n/st=400, pursuer_0/loss=1.766, pursuer_1/loss=1.767, pursuer_2/loss=1.690, pursuer_3/loss=1.861, pursuer_4/loss=1.754, pursuer_5/loss=1.814, pursuer_6/loss=1.952, pursuer_7/loss=1.852, rew=423.81]                           


Epoch #300: test_reward: 464.106563 ± 207.313383, best_reward: 567.282688 ± 144.595916 in #298


Epoch #301: 20001it [01:09, 288.62it/s, env_step=6020000, len=2700, n/ep=0, n/st=400, pursuer_0/loss=1.618, pursuer_1/loss=1.618, pursuer_2/loss=1.743, pursuer_3/loss=1.775, pursuer_4/loss=1.629, pursuer_5/loss=1.768, pursuer_6/loss=1.806, pursuer_7/loss=1.719, rew=387.12]                           


Epoch #301: test_reward: 371.633187 ± 204.373316, best_reward: 567.282688 ± 144.595916 in #298


Epoch #302: 20001it [01:08, 292.07it/s, env_step=6040000, len=3104, n/ep=0, n/st=400, pursuer_0/loss=1.546, pursuer_1/loss=1.640, pursuer_2/loss=1.667, pursuer_3/loss=1.665, pursuer_4/loss=1.731, pursuer_5/loss=1.871, pursuer_6/loss=1.709, pursuer_7/loss=2.032, rew=291.19]                           


Epoch #302: test_reward: 447.260000 ± 204.144028, best_reward: 567.282688 ± 144.595916 in #298


Epoch #303: 20001it [01:07, 297.87it/s, env_step=6060000, len=1712, n/ep=0, n/st=400, pursuer_0/loss=1.587, pursuer_1/loss=1.687, pursuer_2/loss=1.572, pursuer_3/loss=1.867, pursuer_4/loss=1.551, pursuer_5/loss=1.712, pursuer_6/loss=1.747, pursuer_7/loss=1.961, rew=342.18]                           


Epoch #303: test_reward: 443.039312 ± 226.259269, best_reward: 567.282688 ± 144.595916 in #298


Epoch #304: 20001it [01:07, 298.45it/s, env_step=6080000, len=2120, n/ep=0, n/st=400, pursuer_0/loss=1.661, pursuer_1/loss=1.698, pursuer_2/loss=1.621, pursuer_3/loss=2.037, pursuer_4/loss=1.748, pursuer_5/loss=1.662, pursuer_6/loss=1.672, pursuer_7/loss=1.884, rew=491.86]                           


Epoch #304: test_reward: 408.817125 ± 167.783702, best_reward: 567.282688 ± 144.595916 in #298


Epoch #305: 20001it [01:05, 307.18it/s, env_step=6100000, len=1256, n/ep=0, n/st=400, pursuer_0/loss=1.447, pursuer_1/loss=1.599, pursuer_2/loss=1.764, pursuer_3/loss=1.648, pursuer_4/loss=1.621, pursuer_5/loss=1.625, pursuer_6/loss=1.751, pursuer_7/loss=1.762, rew=758.75]                           


Epoch #305: test_reward: 501.234125 ± 219.601319, best_reward: 567.282688 ± 144.595916 in #298


Epoch #306: 20001it [01:07, 297.23it/s, env_step=6120000, len=1728, n/ep=1, n/st=400, pursuer_0/loss=1.499, pursuer_1/loss=1.527, pursuer_2/loss=1.560, pursuer_3/loss=1.627, pursuer_4/loss=1.653, pursuer_5/loss=1.694, pursuer_6/loss=1.726, pursuer_7/loss=1.778, rew=598.18]                           


Epoch #306: test_reward: 398.035937 ± 172.819093, best_reward: 567.282688 ± 144.595916 in #298


Epoch #307: 20001it [01:09, 286.67it/s, env_step=6140000, len=1144, n/ep=0, n/st=400, pursuer_0/loss=1.630, pursuer_1/loss=1.525, pursuer_2/loss=1.688, pursuer_3/loss=1.959, pursuer_4/loss=1.703, pursuer_5/loss=1.810, pursuer_6/loss=1.759, pursuer_7/loss=1.791, rew=606.67]                           


Epoch #307: test_reward: 456.639750 ± 187.658473, best_reward: 567.282688 ± 144.595916 in #298


Epoch #308: 20001it [01:16, 260.08it/s, env_step=6160000, len=2192, n/ep=0, n/st=400, pursuer_0/loss=1.734, pursuer_1/loss=1.670, pursuer_2/loss=1.623, pursuer_3/loss=1.750, pursuer_4/loss=1.690, pursuer_5/loss=1.740, pursuer_6/loss=1.778, pursuer_7/loss=1.925, rew=362.54]                           


Epoch #308: test_reward: 420.480250 ± 125.413839, best_reward: 567.282688 ± 144.595916 in #298


Epoch #309: 20001it [01:07, 296.32it/s, env_step=6180000, len=720, n/ep=0, n/st=400, pursuer_0/loss=1.530, pursuer_1/loss=1.543, pursuer_2/loss=1.533, pursuer_3/loss=1.722, pursuer_4/loss=1.582, pursuer_5/loss=1.554, pursuer_6/loss=1.827, pursuer_7/loss=1.905, rew=460.95]                            


Epoch #309: test_reward: 497.892063 ± 127.590264, best_reward: 567.282688 ± 144.595916 in #298


Epoch #310: 20001it [01:08, 292.40it/s, env_step=6200000, len=1904, n/ep=0, n/st=400, pursuer_0/loss=1.544, pursuer_1/loss=1.663, pursuer_2/loss=1.502, pursuer_3/loss=1.550, pursuer_4/loss=1.578, pursuer_5/loss=1.577, pursuer_6/loss=1.785, pursuer_7/loss=1.749, rew=338.91]                           


Epoch #310: test_reward: 437.402938 ± 170.837485, best_reward: 567.282688 ± 144.595916 in #298


Epoch #311: 20001it [01:12, 277.39it/s, env_step=6220000, len=1096, n/ep=0, n/st=400, pursuer_0/loss=1.634, pursuer_1/loss=1.637, pursuer_2/loss=1.619, pursuer_3/loss=1.880, pursuer_4/loss=1.666, pursuer_5/loss=1.560, pursuer_6/loss=1.735, pursuer_7/loss=1.796, rew=630.03]                           


Epoch #311: test_reward: 402.244000 ± 211.187029, best_reward: 567.282688 ± 144.595916 in #298


Epoch #312: 20001it [01:22, 241.67it/s, env_step=6240000, len=2384, n/ep=0, n/st=400, pursuer_0/loss=1.595, pursuer_1/loss=1.554, pursuer_2/loss=1.538, pursuer_3/loss=1.709, pursuer_4/loss=1.557, pursuer_5/loss=1.810, pursuer_6/loss=1.768, pursuer_7/loss=1.735, rew=461.66]                           


Epoch #312: test_reward: 362.069875 ± 154.518780, best_reward: 567.282688 ± 144.595916 in #298


Epoch #313: 20001it [01:04, 309.60it/s, env_step=6260000, len=2808, n/ep=0, n/st=400, pursuer_0/loss=1.470, pursuer_1/loss=1.549, pursuer_2/loss=1.351, pursuer_3/loss=1.490, pursuer_4/loss=1.498, pursuer_5/loss=1.547, pursuer_6/loss=1.813, pursuer_7/loss=1.551, rew=417.75]                           


Epoch #313: test_reward: 380.772688 ± 166.554528, best_reward: 567.282688 ± 144.595916 in #298


Epoch #314: 20001it [01:25, 232.91it/s, env_step=6280000, len=2128, n/ep=0, n/st=400, pursuer_0/loss=1.635, pursuer_1/loss=1.613, pursuer_2/loss=1.588, pursuer_3/loss=1.711, pursuer_4/loss=1.567, pursuer_5/loss=1.671, pursuer_6/loss=1.841, pursuer_7/loss=2.041, rew=431.21]                           


Epoch #314: test_reward: 415.223125 ± 170.686222, best_reward: 567.282688 ± 144.595916 in #298


Epoch #315: 20001it [01:04, 311.24it/s, env_step=6300000, len=1640, n/ep=0, n/st=400, pursuer_0/loss=1.603, pursuer_1/loss=1.476, pursuer_2/loss=1.770, pursuer_3/loss=1.691, pursuer_4/loss=1.780, pursuer_5/loss=1.707, pursuer_6/loss=1.776, pursuer_7/loss=1.718, rew=564.13]                           


Epoch #315: test_reward: 275.778188 ± 180.899227, best_reward: 567.282688 ± 144.595916 in #298


Epoch #316: 20001it [01:02, 322.29it/s, env_step=6320000, len=1576, n/ep=0, n/st=400, pursuer_0/loss=1.534, pursuer_1/loss=1.664, pursuer_2/loss=1.702, pursuer_3/loss=1.715, pursuer_4/loss=1.598, pursuer_5/loss=1.758, pursuer_6/loss=1.723, pursuer_7/loss=1.830, rew=456.70]                           


Epoch #316: test_reward: 253.585438 ± 179.408198, best_reward: 567.282688 ± 144.595916 in #298


Epoch #317: 20001it [01:04, 310.53it/s, env_step=6340000, len=2624, n/ep=0, n/st=400, pursuer_0/loss=1.675, pursuer_1/loss=1.648, pursuer_2/loss=1.740, pursuer_3/loss=1.689, pursuer_4/loss=1.697, pursuer_5/loss=1.670, pursuer_6/loss=1.703, pursuer_7/loss=1.713, rew=368.71]                           


Epoch #317: test_reward: 325.879062 ± 149.408678, best_reward: 567.282688 ± 144.595916 in #298


Epoch #318: 20001it [01:04, 308.71it/s, env_step=6360000, len=2040, n/ep=0, n/st=400, pursuer_0/loss=1.573, pursuer_1/loss=1.754, pursuer_2/loss=1.729, pursuer_3/loss=1.853, pursuer_4/loss=1.665, pursuer_5/loss=1.774, pursuer_6/loss=1.826, pursuer_7/loss=1.629, rew=349.44]                           


Epoch #318: test_reward: 417.197063 ± 171.326506, best_reward: 567.282688 ± 144.595916 in #298


Epoch #319: 20001it [01:04, 310.27it/s, env_step=6380000, len=2008, n/ep=0, n/st=400, pursuer_0/loss=1.667, pursuer_1/loss=1.718, pursuer_2/loss=1.734, pursuer_3/loss=1.781, pursuer_4/loss=1.694, pursuer_5/loss=1.768, pursuer_6/loss=1.759, pursuer_7/loss=1.865, rew=392.37]                           


Epoch #319: test_reward: 318.689813 ± 179.513202, best_reward: 567.282688 ± 144.595916 in #298


Epoch #320: 20001it [01:04, 308.59it/s, env_step=6400000, len=840, n/ep=1, n/st=400, pursuer_0/loss=1.663, pursuer_1/loss=1.666, pursuer_2/loss=1.664, pursuer_3/loss=1.793, pursuer_4/loss=1.667, pursuer_5/loss=1.729, pursuer_6/loss=1.754, pursuer_7/loss=1.784, rew=594.66]                            


Epoch #320: test_reward: 300.071062 ± 167.020497, best_reward: 567.282688 ± 144.595916 in #298


Epoch #321: 20001it [01:06, 300.55it/s, env_step=6420000, len=1128, n/ep=0, n/st=400, pursuer_0/loss=1.359, pursuer_1/loss=1.557, pursuer_2/loss=1.586, pursuer_3/loss=1.756, pursuer_4/loss=1.595, pursuer_5/loss=1.627, pursuer_6/loss=1.503, pursuer_7/loss=1.765, rew=425.91]                           


Epoch #321: test_reward: 506.360063 ± 101.607402, best_reward: 567.282688 ± 144.595916 in #298


Epoch #322: 20001it [01:06, 299.48it/s, env_step=6440000, len=1592, n/ep=0, n/st=400, pursuer_0/loss=1.458, pursuer_1/loss=1.714, pursuer_2/loss=1.714, pursuer_3/loss=1.688, pursuer_4/loss=1.601, pursuer_5/loss=1.775, pursuer_6/loss=1.660, pursuer_7/loss=1.755, rew=520.74]                           


Epoch #322: test_reward: 431.435375 ± 168.541542, best_reward: 567.282688 ± 144.595916 in #298


Epoch #323: 20001it [01:04, 309.21it/s, env_step=6460000, len=1288, n/ep=1, n/st=400, pursuer_0/loss=1.702, pursuer_1/loss=1.618, pursuer_2/loss=1.728, pursuer_3/loss=1.878, pursuer_4/loss=1.752, pursuer_5/loss=1.755, pursuer_6/loss=1.755, pursuer_7/loss=1.922, rew=404.08]                           


Epoch #323: test_reward: 410.254313 ± 175.752987, best_reward: 567.282688 ± 144.595916 in #298


Epoch #324: 20001it [01:02, 318.56it/s, env_step=6480000, len=1456, n/ep=1, n/st=400, pursuer_0/loss=1.653, pursuer_1/loss=1.728, pursuer_2/loss=1.655, pursuer_3/loss=1.724, pursuer_4/loss=1.646, pursuer_5/loss=1.660, pursuer_6/loss=1.690, pursuer_7/loss=1.802, rew=378.76]                           


Epoch #324: test_reward: 439.478125 ± 161.739796, best_reward: 567.282688 ± 144.595916 in #298


Epoch #325: 20001it [01:04, 311.41it/s, env_step=6500000, len=992, n/ep=0, n/st=400, pursuer_0/loss=1.754, pursuer_1/loss=1.824, pursuer_2/loss=1.850, pursuer_3/loss=1.927, pursuer_4/loss=1.836, pursuer_5/loss=2.053, pursuer_6/loss=1.754, pursuer_7/loss=2.013, rew=513.76]                            


Epoch #325: test_reward: 453.142750 ± 184.681431, best_reward: 567.282688 ± 144.595916 in #298


Epoch #326: 20001it [01:03, 312.96it/s, env_step=6520000, len=1304, n/ep=0, n/st=400, pursuer_0/loss=1.724, pursuer_1/loss=1.464, pursuer_2/loss=1.629, pursuer_3/loss=1.686, pursuer_4/loss=1.654, pursuer_5/loss=1.817, pursuer_6/loss=1.759, pursuer_7/loss=1.824, rew=684.67]                           


Epoch #326: test_reward: 452.286625 ± 197.177865, best_reward: 567.282688 ± 144.595916 in #298


Epoch #327: 20001it [01:02, 318.63it/s, env_step=6540000, len=1184, n/ep=1, n/st=400, pursuer_0/loss=1.619, pursuer_1/loss=1.661, pursuer_2/loss=1.715, pursuer_3/loss=1.842, pursuer_4/loss=1.746, pursuer_5/loss=1.873, pursuer_6/loss=1.850, pursuer_7/loss=1.978, rew=430.59]                           


Epoch #327: test_reward: 434.504313 ± 155.482993, best_reward: 567.282688 ± 144.595916 in #298


Epoch #328: 20001it [01:02, 318.07it/s, env_step=6560000, len=832, n/ep=0, n/st=400, pursuer_0/loss=1.555, pursuer_1/loss=1.691, pursuer_2/loss=1.705, pursuer_3/loss=1.807, pursuer_4/loss=1.785, pursuer_5/loss=1.736, pursuer_6/loss=1.766, pursuer_7/loss=1.844, rew=479.10]                            


Epoch #328: test_reward: 468.517563 ± 141.299283, best_reward: 567.282688 ± 144.595916 in #298


Epoch #329: 20001it [00:56, 356.23it/s, env_step=6580000, len=1512, n/ep=0, n/st=400, pursuer_0/loss=1.630, pursuer_1/loss=1.797, pursuer_2/loss=1.832, pursuer_3/loss=1.863, pursuer_4/loss=1.768, pursuer_5/loss=1.885, pursuer_6/loss=1.918, pursuer_7/loss=1.873, rew=387.76]                           


Epoch #329: test_reward: 388.989813 ± 169.410444, best_reward: 567.282688 ± 144.595916 in #298


Epoch #330: 20001it [01:01, 324.06it/s, env_step=6600000, len=3776, n/ep=0, n/st=400, pursuer_0/loss=1.469, pursuer_1/loss=1.688, pursuer_2/loss=1.892, pursuer_3/loss=1.806, pursuer_4/loss=1.766, pursuer_5/loss=1.907, pursuer_6/loss=1.788, pursuer_7/loss=1.911, rew=227.10]                           


Epoch #330: test_reward: 444.896688 ± 232.490605, best_reward: 567.282688 ± 144.595916 in #298


Epoch #331: 20001it [00:57, 347.12it/s, env_step=6620000, len=1256, n/ep=1, n/st=400, pursuer_0/loss=1.626, pursuer_1/loss=1.808, pursuer_2/loss=1.726, pursuer_3/loss=1.667, pursuer_4/loss=1.791, pursuer_5/loss=1.930, pursuer_6/loss=1.868, pursuer_7/loss=1.855, rew=557.83]                           


Epoch #331: test_reward: 502.674438 ± 182.523877, best_reward: 567.282688 ± 144.595916 in #298


Epoch #332: 20001it [00:58, 344.05it/s, env_step=6640000, len=3440, n/ep=0, n/st=400, pursuer_0/loss=1.712, pursuer_1/loss=1.856, pursuer_2/loss=1.848, pursuer_3/loss=1.798, pursuer_4/loss=1.807, pursuer_5/loss=1.934, pursuer_6/loss=2.047, pursuer_7/loss=1.987, rew=316.20]                           


Epoch #332: test_reward: 398.593063 ± 196.026627, best_reward: 567.282688 ± 144.595916 in #298


Epoch #333: 20001it [00:58, 343.61it/s, env_step=6660000, len=2052, n/ep=0, n/st=400, pursuer_0/loss=1.731, pursuer_1/loss=1.753, pursuer_2/loss=1.816, pursuer_3/loss=1.770, pursuer_4/loss=1.812, pursuer_5/loss=1.949, pursuer_6/loss=1.814, pursuer_7/loss=1.862, rew=453.24]                           


Epoch #333: test_reward: 321.397813 ± 145.120243, best_reward: 567.282688 ± 144.595916 in #298


Epoch #334: 20001it [00:57, 346.00it/s, env_step=6680000, len=3080, n/ep=0, n/st=400, pursuer_0/loss=1.566, pursuer_1/loss=1.607, pursuer_2/loss=1.667, pursuer_3/loss=1.707, pursuer_4/loss=1.658, pursuer_5/loss=1.895, pursuer_6/loss=1.928, pursuer_7/loss=1.860, rew=146.97]                           


Epoch #334: test_reward: 263.781313 ± 110.788725, best_reward: 567.282688 ± 144.595916 in #298


Epoch #335: 20001it [00:56, 351.18it/s, env_step=6700000, len=3952, n/ep=0, n/st=400, pursuer_0/loss=1.935, pursuer_1/loss=1.730, pursuer_2/loss=1.764, pursuer_3/loss=2.025, pursuer_4/loss=1.957, pursuer_5/loss=1.985, pursuer_6/loss=2.201, pursuer_7/loss=2.101, rew=374.81]                           


Epoch #335: test_reward: 279.224187 ± 193.637289, best_reward: 567.282688 ± 144.595916 in #298


Epoch #336: 20001it [00:58, 344.76it/s, env_step=6720000, len=1672, n/ep=0, n/st=400, pursuer_0/loss=1.644, pursuer_1/loss=1.723, pursuer_2/loss=1.717, pursuer_3/loss=1.721, pursuer_4/loss=1.631, pursuer_5/loss=1.719, pursuer_6/loss=1.958, pursuer_7/loss=1.731, rew=356.82]                           


Epoch #336: test_reward: 381.579500 ± 216.118747, best_reward: 567.282688 ± 144.595916 in #298


Epoch #337: 20001it [00:55, 358.74it/s, env_step=6740000, len=1496, n/ep=0, n/st=400, pursuer_0/loss=1.765, pursuer_1/loss=1.612, pursuer_2/loss=1.939, pursuer_3/loss=1.829, pursuer_4/loss=1.943, pursuer_5/loss=1.807, pursuer_6/loss=1.969, pursuer_7/loss=1.833, rew=453.93]                           


Epoch #337: test_reward: 385.067125 ± 158.073703, best_reward: 567.282688 ± 144.595916 in #298


Epoch #338: 20001it [01:05, 306.04it/s, env_step=6760000, len=904, n/ep=0, n/st=400, pursuer_0/loss=1.670, pursuer_1/loss=1.637, pursuer_2/loss=1.807, pursuer_3/loss=1.548, pursuer_4/loss=1.619, pursuer_5/loss=1.698, pursuer_6/loss=2.011, pursuer_7/loss=1.789, rew=492.86]                           


Epoch #338: test_reward: 419.971250 ± 207.772319, best_reward: 567.282688 ± 144.595916 in #298


Epoch #339: 20001it [01:07, 296.91it/s, env_step=6780000, len=848, n/ep=0, n/st=400, pursuer_0/loss=1.466, pursuer_1/loss=1.476, pursuer_2/loss=1.664, pursuer_3/loss=1.615, pursuer_4/loss=1.449, pursuer_5/loss=1.773, pursuer_6/loss=1.733, pursuer_7/loss=1.787, rew=830.07]                            


Epoch #339: test_reward: 239.291750 ± 104.310921, best_reward: 567.282688 ± 144.595916 in #298


Epoch #340: 20001it [01:05, 305.86it/s, env_step=6800000, len=2160, n/ep=0, n/st=400, pursuer_0/loss=1.571, pursuer_1/loss=1.623, pursuer_2/loss=1.845, pursuer_3/loss=1.651, pursuer_4/loss=1.489, pursuer_5/loss=1.696, pursuer_6/loss=1.792, pursuer_7/loss=1.803, rew=438.09]                           


Epoch #340: test_reward: 282.156750 ± 149.171805, best_reward: 567.282688 ± 144.595916 in #298


Epoch #341: 20001it [01:02, 321.54it/s, env_step=6820000, len=1472, n/ep=0, n/st=400, pursuer_0/loss=1.411, pursuer_1/loss=1.392, pursuer_2/loss=1.561, pursuer_3/loss=1.806, pursuer_4/loss=1.463, pursuer_5/loss=1.544, pursuer_6/loss=1.680, pursuer_7/loss=1.625, rew=411.40]                           


Epoch #341: test_reward: 461.192188 ± 102.753488, best_reward: 567.282688 ± 144.595916 in #298


Epoch #342: 20001it [01:03, 317.05it/s, env_step=6840000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.530, pursuer_1/loss=1.474, pursuer_2/loss=1.500, pursuer_3/loss=1.458, pursuer_4/loss=1.521, pursuer_5/loss=1.528, pursuer_6/loss=1.578, pursuer_7/loss=1.629, rew=103.07]                           


Epoch #342: test_reward: 316.175250 ± 211.067534, best_reward: 567.282688 ± 144.595916 in #298


Epoch #343: 20001it [01:20, 247.56it/s, env_step=6860000, len=1400, n/ep=0, n/st=400, pursuer_0/loss=1.569, pursuer_1/loss=1.404, pursuer_2/loss=1.736, pursuer_3/loss=1.468, pursuer_4/loss=1.538, pursuer_5/loss=1.659, pursuer_6/loss=1.639, pursuer_7/loss=1.633, rew=503.41]                           


Epoch #343: test_reward: 290.840937 ± 177.567833, best_reward: 567.282688 ± 144.595916 in #298


Epoch #344: 20001it [01:17, 256.57it/s, env_step=6880000, len=888, n/ep=1, n/st=400, pursuer_0/loss=1.383, pursuer_1/loss=1.390, pursuer_2/loss=1.557, pursuer_3/loss=1.531, pursuer_4/loss=1.349, pursuer_5/loss=1.440, pursuer_6/loss=1.493, pursuer_7/loss=1.471, rew=559.75]                            


Epoch #344: test_reward: 382.489875 ± 226.483541, best_reward: 567.282688 ± 144.595916 in #298


Epoch #345: 20001it [01:18, 255.18it/s, env_step=6900000, len=1840, n/ep=0, n/st=400, pursuer_0/loss=1.409, pursuer_1/loss=1.500, pursuer_2/loss=1.592, pursuer_3/loss=1.580, pursuer_4/loss=1.602, pursuer_5/loss=1.640, pursuer_6/loss=1.632, pursuer_7/loss=1.718, rew=410.50]                           


Epoch #345: test_reward: 435.394063 ± 169.610263, best_reward: 567.282688 ± 144.595916 in #298


Epoch #346: 20001it [01:08, 291.19it/s, env_step=6920000, len=3752, n/ep=0, n/st=400, pursuer_0/loss=1.420, pursuer_1/loss=1.385, pursuer_2/loss=1.440, pursuer_3/loss=1.642, pursuer_4/loss=1.390, pursuer_5/loss=1.594, pursuer_6/loss=1.777, pursuer_7/loss=1.496, rew=149.50]                           


Epoch #346: test_reward: 328.504875 ± 170.026046, best_reward: 567.282688 ± 144.595916 in #298


Epoch #347: 20001it [01:02, 318.11it/s, env_step=6940000, len=1576, n/ep=0, n/st=400, pursuer_0/loss=1.424, pursuer_1/loss=1.441, pursuer_2/loss=1.440, pursuer_3/loss=1.532, pursuer_4/loss=1.470, pursuer_5/loss=1.480, pursuer_6/loss=1.490, pursuer_7/loss=1.546, rew=351.94]                           


Epoch #347: test_reward: 506.253625 ± 173.561745, best_reward: 567.282688 ± 144.595916 in #298


Epoch #348: 20001it [01:03, 313.89it/s, env_step=6960000, len=976, n/ep=0, n/st=400, pursuer_0/loss=1.306, pursuer_1/loss=1.361, pursuer_2/loss=1.454, pursuer_3/loss=1.538, pursuer_4/loss=1.475, pursuer_5/loss=1.627, pursuer_6/loss=1.597, pursuer_7/loss=1.580, rew=686.24]                            


Epoch #348: test_reward: 430.065000 ± 165.500282, best_reward: 567.282688 ± 144.595916 in #298


Epoch #349: 20001it [01:05, 307.31it/s, env_step=6980000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.289, pursuer_1/loss=1.386, pursuer_2/loss=1.516, pursuer_3/loss=1.537, pursuer_4/loss=1.454, pursuer_5/loss=1.615, pursuer_6/loss=1.549, pursuer_7/loss=1.487, rew=334.35]                           


Epoch #349: test_reward: 232.751062 ± 198.072139, best_reward: 567.282688 ± 144.595916 in #298


Epoch #350: 20001it [01:06, 302.81it/s, env_step=7000000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.429, pursuer_1/loss=1.301, pursuer_2/loss=1.317, pursuer_3/loss=1.500, pursuer_4/loss=1.538, pursuer_5/loss=1.540, pursuer_6/loss=1.611, pursuer_7/loss=1.669, rew=159.48]                           


Epoch #350: test_reward: 408.851813 ± 206.383969, best_reward: 567.282688 ± 144.595916 in #298


Epoch #351: 20001it [01:04, 310.69it/s, env_step=7020000, len=2728, n/ep=0, n/st=400, pursuer_0/loss=1.354, pursuer_1/loss=1.403, pursuer_2/loss=1.546, pursuer_3/loss=1.578, pursuer_4/loss=1.495, pursuer_5/loss=1.635, pursuer_6/loss=1.482, pursuer_7/loss=1.680, rew=287.63]                           


Epoch #351: test_reward: 459.410000 ± 135.638673, best_reward: 567.282688 ± 144.595916 in #298


Epoch #352: 20001it [01:18, 255.20it/s, env_step=7040000, len=1272, n/ep=0, n/st=400, pursuer_0/loss=1.609, pursuer_1/loss=1.426, pursuer_2/loss=1.359, pursuer_3/loss=1.605, pursuer_4/loss=1.457, pursuer_5/loss=1.505, pursuer_6/loss=1.556, pursuer_7/loss=1.733, rew=607.32]                           


Epoch #352: test_reward: 337.263750 ± 184.572209, best_reward: 567.282688 ± 144.595916 in #298


Epoch #353: 20001it [01:05, 305.85it/s, env_step=7060000, len=2672, n/ep=0, n/st=400, pursuer_0/loss=1.480, pursuer_1/loss=1.321, pursuer_2/loss=1.559, pursuer_3/loss=1.612, pursuer_4/loss=1.633, pursuer_5/loss=1.736, pursuer_6/loss=1.676, pursuer_7/loss=1.632, rew=462.40]                           


Epoch #353: test_reward: 300.938375 ± 203.979661, best_reward: 567.282688 ± 144.595916 in #298


Epoch #354: 20001it [00:58, 342.79it/s, env_step=7080000, len=1160, n/ep=0, n/st=400, pursuer_0/loss=1.386, pursuer_1/loss=1.464, pursuer_2/loss=1.516, pursuer_3/loss=1.576, pursuer_4/loss=1.566, pursuer_5/loss=1.616, pursuer_6/loss=1.835, pursuer_7/loss=1.787, rew=418.11]                           


Epoch #354: test_reward: 329.738250 ± 176.775425, best_reward: 567.282688 ± 144.595916 in #298


Epoch #355: 20001it [01:00, 332.53it/s, env_step=7100000, len=1768, n/ep=0, n/st=400, pursuer_0/loss=1.295, pursuer_1/loss=1.449, pursuer_2/loss=1.520, pursuer_3/loss=1.683, pursuer_4/loss=1.518, pursuer_5/loss=1.591, pursuer_6/loss=1.628, pursuer_7/loss=1.614, rew=567.16]                           


Epoch #355: test_reward: 497.879750 ± 208.709530, best_reward: 567.282688 ± 144.595916 in #298


Epoch #356: 20001it [01:00, 331.81it/s, env_step=7120000, len=1720, n/ep=0, n/st=400, pursuer_0/loss=1.516, pursuer_1/loss=1.525, pursuer_2/loss=1.381, pursuer_3/loss=1.692, pursuer_4/loss=1.509, pursuer_5/loss=1.625, pursuer_6/loss=1.504, pursuer_7/loss=1.537, rew=366.49]                           


Epoch #356: test_reward: 536.496563 ± 144.868041, best_reward: 567.282688 ± 144.595916 in #298


Epoch #357: 20001it [01:00, 331.70it/s, env_step=7140000, len=2320, n/ep=0, n/st=400, pursuer_0/loss=1.572, pursuer_1/loss=1.502, pursuer_2/loss=1.514, pursuer_3/loss=1.555, pursuer_4/loss=1.539, pursuer_5/loss=1.724, pursuer_6/loss=1.736, pursuer_7/loss=1.683, rew=422.24]                           


Epoch #357: test_reward: 459.943750 ± 208.863306, best_reward: 567.282688 ± 144.595916 in #298


Epoch #358: 20001it [01:00, 332.09it/s, env_step=7160000, len=1548, n/ep=0, n/st=400, pursuer_0/loss=1.458, pursuer_1/loss=1.507, pursuer_2/loss=1.548, pursuer_3/loss=1.656, pursuer_4/loss=1.543, pursuer_5/loss=1.743, pursuer_6/loss=1.640, pursuer_7/loss=1.629, rew=562.22]                           


Epoch #358: test_reward: 225.290750 ± 155.970279, best_reward: 567.282688 ± 144.595916 in #298


Epoch #359: 20001it [01:00, 330.44it/s, env_step=7180000, len=1144, n/ep=0, n/st=400, pursuer_0/loss=1.492, pursuer_1/loss=1.469, pursuer_2/loss=1.623, pursuer_3/loss=1.626, pursuer_4/loss=1.658, pursuer_5/loss=1.610, pursuer_6/loss=1.688, pursuer_7/loss=1.760, rew=490.33]                           


Epoch #359: test_reward: 445.115125 ± 150.099835, best_reward: 567.282688 ± 144.595916 in #298


Epoch #360: 20001it [01:00, 329.49it/s, env_step=7200000, len=3752, n/ep=0, n/st=400, pursuer_0/loss=1.689, pursuer_1/loss=1.695, pursuer_2/loss=1.696, pursuer_3/loss=1.640, pursuer_4/loss=1.755, pursuer_5/loss=1.814, pursuer_6/loss=1.653, pursuer_7/loss=1.934, rew=185.18]                           


Epoch #360: test_reward: 507.245500 ± 128.162414, best_reward: 567.282688 ± 144.595916 in #298


Epoch #361: 20001it [00:59, 333.90it/s, env_step=7220000, len=3280, n/ep=0, n/st=400, pursuer_0/loss=1.653, pursuer_1/loss=1.377, pursuer_2/loss=1.493, pursuer_3/loss=1.656, pursuer_4/loss=1.529, pursuer_5/loss=1.600, pursuer_6/loss=1.572, pursuer_7/loss=1.632, rew=256.25]                           


Epoch #361: test_reward: 476.149500 ± 191.478654, best_reward: 567.282688 ± 144.595916 in #298


Epoch #362: 20001it [01:00, 329.77it/s, env_step=7240000, len=1824, n/ep=0, n/st=400, pursuer_0/loss=1.502, pursuer_1/loss=1.601, pursuer_2/loss=1.701, pursuer_3/loss=1.740, pursuer_4/loss=1.612, pursuer_5/loss=1.755, pursuer_6/loss=1.719, pursuer_7/loss=1.827, rew=386.98]                           


Epoch #362: test_reward: 385.582875 ± 221.522022, best_reward: 567.282688 ± 144.595916 in #298


Epoch #363: 20001it [00:59, 338.91it/s, env_step=7260000, len=936, n/ep=0, n/st=400, pursuer_0/loss=1.430, pursuer_1/loss=1.666, pursuer_2/loss=1.500, pursuer_3/loss=1.534, pursuer_4/loss=1.530, pursuer_5/loss=1.767, pursuer_6/loss=1.752, pursuer_7/loss=1.596, rew=490.40]                            


Epoch #363: test_reward: 266.501625 ± 218.655466, best_reward: 567.282688 ± 144.595916 in #298


Epoch #364: 20001it [00:58, 339.82it/s, env_step=7280000, len=952, n/ep=0, n/st=400, pursuer_0/loss=1.498, pursuer_1/loss=1.520, pursuer_2/loss=1.664, pursuer_3/loss=1.705, pursuer_4/loss=1.690, pursuer_5/loss=1.721, pursuer_6/loss=1.730, pursuer_7/loss=1.867, rew=537.84]                            


Epoch #364: test_reward: 276.631563 ± 276.789452, best_reward: 567.282688 ± 144.595916 in #298


Epoch #365: 20001it [00:58, 343.83it/s, env_step=7300000, len=3400, n/ep=0, n/st=400, pursuer_0/loss=1.551, pursuer_1/loss=1.518, pursuer_2/loss=1.582, pursuer_3/loss=1.866, pursuer_4/loss=1.623, pursuer_5/loss=1.745, pursuer_6/loss=1.720, pursuer_7/loss=1.733, rew=254.63]                           


Epoch #365: test_reward: 485.020312 ± 128.161377, best_reward: 567.282688 ± 144.595916 in #298


Epoch #366: 20001it [01:19, 251.93it/s, env_step=7320000, len=1680, n/ep=0, n/st=400, pursuer_0/loss=1.496, pursuer_1/loss=1.584, pursuer_2/loss=1.635, pursuer_3/loss=1.765, pursuer_4/loss=1.680, pursuer_5/loss=1.587, pursuer_6/loss=1.906, pursuer_7/loss=1.738, rew=595.43]                           


Epoch #366: test_reward: 327.856125 ± 226.649955, best_reward: 567.282688 ± 144.595916 in #298


Epoch #367: 20001it [01:39, 201.61it/s, env_step=7340000, len=936, n/ep=0, n/st=400, pursuer_0/loss=1.375, pursuer_1/loss=1.562, pursuer_2/loss=1.694, pursuer_3/loss=1.775, pursuer_4/loss=1.682, pursuer_5/loss=1.892, pursuer_6/loss=1.657, pursuer_7/loss=1.745, rew=420.20]                            


Epoch #367: test_reward: 442.856813 ± 159.051396, best_reward: 567.282688 ± 144.595916 in #298


Epoch #368: 20001it [00:56, 353.49it/s, env_step=7360000, len=1360, n/ep=0, n/st=400, pursuer_0/loss=1.480, pursuer_1/loss=1.628, pursuer_2/loss=1.483, pursuer_3/loss=1.551, pursuer_4/loss=1.603, pursuer_5/loss=1.586, pursuer_6/loss=1.473, pursuer_7/loss=1.754, rew=573.14]                           


Epoch #368: test_reward: 389.079000 ± 196.061036, best_reward: 567.282688 ± 144.595916 in #298


Epoch #369: 20001it [00:58, 343.78it/s, env_step=7380000, len=3064, n/ep=0, n/st=400, pursuer_0/loss=1.528, pursuer_1/loss=1.528, pursuer_2/loss=1.679, pursuer_3/loss=1.655, pursuer_4/loss=1.578, pursuer_5/loss=1.727, pursuer_6/loss=1.850, pursuer_7/loss=1.840, rew=289.09]                           


Epoch #369: test_reward: 367.659937 ± 214.303029, best_reward: 567.282688 ± 144.595916 in #298


Epoch #370: 20001it [00:59, 336.72it/s, env_step=7400000, len=1776, n/ep=0, n/st=400, pursuer_0/loss=1.492, pursuer_1/loss=1.687, pursuer_2/loss=1.573, pursuer_3/loss=1.841, pursuer_4/loss=1.450, pursuer_5/loss=1.804, pursuer_6/loss=1.665, pursuer_7/loss=1.814, rew=382.45]                           


Epoch #370: test_reward: 197.087937 ± 175.238803, best_reward: 567.282688 ± 144.595916 in #298


Epoch #371: 20001it [00:57, 349.08it/s, env_step=7420000, len=4000, n/ep=0, n/st=400, pursuer_0/loss=1.314, pursuer_1/loss=1.496, pursuer_2/loss=1.491, pursuer_3/loss=1.759, pursuer_4/loss=1.345, pursuer_5/loss=1.685, pursuer_6/loss=1.606, pursuer_7/loss=1.590, rew=71.27]                            


Epoch #371: test_reward: 270.570187 ± 177.922711, best_reward: 567.282688 ± 144.595916 in #298


Epoch #372: 20001it [00:59, 338.75it/s, env_step=7440000, len=2616, n/ep=0, n/st=400, pursuer_0/loss=1.410, pursuer_1/loss=1.555, pursuer_2/loss=1.589, pursuer_3/loss=1.643, pursuer_4/loss=1.455, pursuer_5/loss=1.532, pursuer_6/loss=1.682, pursuer_7/loss=1.634, rew=393.40]                           


Epoch #372: test_reward: 458.098500 ± 165.884837, best_reward: 567.282688 ± 144.595916 in #298


Epoch #373:  86%|########6 | 17200/20000 [00:49<00:08, 339.56it/s, env_step=7456800, len=1416, n/ep=0, n/st=400, pursuer_0/loss=1.417, pursuer_1/loss=1.453, pursuer_2/loss=1.598, pursuer_3/loss=1.479, pursuer_4/loss=1.642, pursuer_5/loss=1.557, pursuer_6/loss=1.758, pursuer_7/loss=1.718, rew=367.84]

In [None]:

from typing import Optional, Tuple
import os
import numpy as np
import torch
from tianshou.env import DummyVectorEnv

import torch

from pettingzoo.sisl import pursuit_v4
from TaskAllocation.RL_Policies.MultiHead_SISL import MultiHead_SISL

import os
import datetime
from typing import Optional, Tuple

import numpy as np
import torch
from tianshou.data import Collector, VectorReplayBuffer, PrioritizedVectorReplayBuffer
from tianshou.env import DummyVectorEnv
from tianshou.env.pettingzoo_env import PettingZooEnv
from tianshou.policy import BasePolicy, DQNPolicy, MultiAgentPolicyManager, RandomPolicy, RainbowPolicy
from tianshou.trainer import OffpolicyTrainer
from torch.utils.tensorboard import SummaryWriter
from tianshou.utils import TensorboardLogger
from TaskAllocation.RL_Policies.CNN_SISL import CNN_SISL

from pettingzoo.sisl import pursuit_v4

from TaskAllocation.RL_Policies.MultiHead_SISL import MultiHead_SISL
from TaskAllocation.RL_Policies.DNN_SISL import DNN_SISL


from TaskAllocation.RL_Policies.Custom_Classes import CustomCollector

#from tianshou_DQN import train
model = "CNN_SISL" #"MultiHead_SISL" 
test_num = "_SISL_NOV12_Emb128"
policyModel = "DQN"

train_env_num = 10
test_env_num = 10

name = model + test_num

load_policy_name = f'policy_CNN_SISL_SISL_NOV12_Emb128_last.pth'
save_policy_name = f'policy_{name}'
policy_path = "dqn_Custom"

same_policy = True

load_model = False

size = [16,16]
n_agents = 8
n_targes = 30
max_cycles = 500
obs_range = 7

# log
now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
log_name = name + str(now)

log_path = os.path.join('./', "Logs", "dqn", log_name)

dqn_params = {"discount_factor": 0.99, 
              "estimation_step": 1, 
              "target_update_freq": 800 * 4,
              "optminizer": "Adam",
              "lr": 1e-4 }

trainer_params = {"max_epoch": 500,
                  "step_per_epoch": 10 * (100 * 10 * 4),
                  "step_per_collect": 20 * (10 * 4),
                  "episode_per_test": 10,
                  "batch_size" : 4 * 32,
                  "update_per_step": 1 / 20, #Only run after close a Collect (run many times as necessary to meet the value)
                  "tn_eps_max": 0.95,
                  "ts_eps_max": 0.0,
                  }


Run_Data = f'''{name}  
Loaded_Model: {load_policy_name if load_model else "no"}  
log_path: {log_path}  
train/test_env_num: {train_env_num} / {test_env_num}  
model: {model}  
dqn_params: {dqn_params}  
trainer_params: {trainer_params} 
single_policy: {same_policy}

--------- Env ------------  
'''
# Rewards Only Final Quality and SQuality
# F_Rew / 20 > lre
# random_init_pos      : {config_default.random_init_pos}
# max_time_steps       : {config_default.max_time_steps}
# simulation_frame_rate: {config_default.simulation_frame_rate}
# Agents               : {config_default.agents}
# tasks                : {config_default.tasks}
# random_init_pos      : {config_default.random_init_pos} 
# threats              : {config_default.threats_list}
# seed                 : {config_default.fixed_seed}
# '''

model_load_path = os.path.join(policy_path, load_policy_name)  
model_save_path = os.path.join(policy_path, save_policy_name)        
os.makedirs(os.path.join(policy_path), exist_ok=True)  
os.makedirs(os.path.join(log_path), exist_ok=True)

def _get_env_eval():
    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    env = pursuit_v4.env(
        max_cycles=500,#max_cycles, 
        x_size=size[0], 
        y_size=size[1], 
        shared_reward=True, 
        n_evaders=n_targes,
        n_pursuers=n_agents,
        obs_range=obs_range, 
        n_catch=2, 
        freeze_evaders=False, 
        tag_reward=0.01,
        catch_reward=5.0, 
        urgency_reward=-0.1, 
        surround=True, 
        constraint_window=1.0,
        render_mode = None)#"human")
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    
    return PettingZooEnv(env)

def _get_agents(
    agent_learn: Optional[BasePolicy] = None,
    agent_opponent: Optional[BasePolicy] = None,
    optim: Optional[torch.optim.Optimizer] = None,
    policy_load_path = None,
) -> Tuple[BasePolicy, torch.optim.Optimizer, list]:
    
    env = _get_env_eval()       
    agent_observation_space = env.observation_space
   
    action_shape = env.action_space
    
    device="cuda" if torch.cuda.is_available() else "cpu"          
    
    if agent_learn is None:      
        
        if model == "MultiHead_SISL":
            net = MultiHead_SISL(
                obs_shape=agent_observation_space,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "DNN_SISL":
            net = DNN_SISL(
                obs_shape=agent_observation_space,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "CNN_SISL":
            net = CNN_SISL(
                obs_shape=agent_observation_space.shape,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)
           

        if optim is None:
            optim = torch.optim.Adam(net.parameters(), lr=dqn_params["lr"], weight_decay=0.0, amsgrad= True )                
    
        if policyModel == "DQN":
            agent_learn = DQNPolicy(
                model=net,
                optim=optim,
                action_space = action_shape,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
                reward_normalization = False,
                clip_loss_grad = False 
            ) 
                     

        if policyModel == "Rainbow":
            agent_learn = RainbowPolicy(
                model=net.to(device),
                optim=optim,
                num_atoms= 31,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
            ) 
         
 
        if load_model is True:
            # Load the saved checkpoint             
            agent_learn.load_state_dict(torch.load(model_load_path))
            print(f'Loaded-> {model_load_path}')
                   
        #print(env.agents)
        #agents = [agent_learn for _ in range(len(env.agents))]
        
        agents = [agent_learn for _ in range(len(env.agents))]

        
    policy = MultiAgentPolicyManager(policies = agents, env=env)  
        
    return policy, optim, env.agents


# Create a new instance of the policy with the same architecture as the saved policy
name = 'policy_CNN_SISL_SISL_NOV12_Emb128.pth' 
load_policy_name = f'policy_{name}'



log_path = os.path.join('./', "Logs", "dqn", name)

policy, optim, _ = _get_agents()
model_save_path = os.path.join("dqn_SISL", save_policy_name)        

# Load the saved checkpoint
policy_test = policy.policies['pursuer_0']
policy_test.load_state_dict(torch.load(model_save_path + ".pth" ))

envs = DummyVectorEnv([_get_env_eval for _ in range(1)])
#policy_test.eval()
policy_test.set_eps(0.05)

#collector = CustomCollector(policy.policies['agent0'], envs, exploration_noise=True)
#collector = CustomCollector(policy_test, envs, exploration_noise=False)
collector = CustomCollector(policy, envs, exploration_noise=True)

#results = collector.collect(n_episode=1)
results = collector.collect(n_episode=1)#, render=0.1,)#, gym_reset_kwargs={'seed' :2})
results

In [None]:
results['rews']
print(np.mean(results['rews'][results['rews'] > -10]))


#create a function  to print a histogram of the results['rews']
import matplotlib.pyplot as plt
plt.hist(results['rews'][results['rews'] > -10], bins=100)
plt.show()
