In [1]:
import os
import datetime
from typing import Optional, Tuple
import json


os.environ["WANDB_NOTEBOOK_NAME"] = "Tianshow_Centralized_Training"

import numpy as np
import torch
from tianshou.data import Collector, VectorReplayBuffer, PrioritizedVectorReplayBuffer
from tianshou.env import DummyVectorEnv
from tianshou.env.pettingzoo_env import PettingZooEnv
from tianshou.policy import BasePolicy, DQNPolicy, MultiAgentPolicyManager, RandomPolicy, RainbowPolicy
from tianshou.trainer import OffpolicyTrainer
from torch.utils.tensorboard import SummaryWriter

# from pettingzoo.sisl import pursuit_v4

from TaskAllocation.RL_Policies.MultiHead_SISL import MultiHead_SISL
from TaskAllocation.RL_Policies.DNN_SISL import DNN_SISL
from TaskAllocation.RL_Policies.CNN_SISL import CNN_SISL
from TaskAllocation.RL_Policies.CNN_ATT_SISL import CNN_ATT_SISL
from TaskAllocation.RL_Policies.SISL_Task_MultiHead import SISL_Task_MultiHead


from Mods.MemoryBuffer import StateMemoryVectorReplayBuffer
from Mods.MemoryBuffer import MemoryOffpolicyTrainer
import Mods.MemPursuitEnv as MemPursuitEnv
from Mods.OffPolicyTrainerMod import OffPolicyTrainerMod

import Mods.TaskPursuitEnv as TaskPursuitEnv

from TaskAllocation.RL_Policies.Custom_Classes import CustomNet
from TaskAllocation.RL_Policies.Custom_Classes import CustomCollector
from TaskAllocation.RL_Policies.Custom_Classes import CustomParallelToAECWrapper

# Add specific modification to tianshou
import wandb
from tianshou.utils import WandbLogger
from tianshou.utils.logger.base import LOG_DATA_TYPE

def new_write(self, step_type: str, step: int, data: LOG_DATA_TYPE) -> None:
    data[step_type] = step
    wandb.log(data)
    
WandbLogger.write = new_write 

#from tianshou_DQN import train
model  =  "SISL_Task_MultiHead" #"CNN_ATT_SISL" #"MultiHead_SISL" 
test_num  =  "_Desk_NewExpFix_noActHist"
policyModel  =  "DQN"

train_env_num = 10
test_env_num = 10

name = model + test_num

# log
now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
log_name = name + str(now)
log_path = os.path.join('./', "Logs", "dqn_sisl", log_name)

#policy
load_policy_name = f'policy_SISL_Task_MultiHead_Desk_NewExpCor231219-173711_44.pth'
save_policy_name = f'policy_{log_name}'
policy_path = "dqn_SISL"

Policy_Config = {
    "same_policy" : True,
    "load_model" : False,
    "freeze_CNN" : False     
                }

SISL_Config = {
    "max_cycles": 500,         # default: 500
    "x_size": 16,              # default: 16
    "y_size": 16,              # default: 16
    "shared_reward": False,    # default: True
    "n_evaders": 30,           # default: 30
    "n_pursuers": 8,           # default: 10
    "obs_range": 7,            # default: 7
    "n_catch": 2,              # default: 2
    "freeze_evaders": False,   # default: False
    "tag_reward": 0.01,        # default: 0.01
    "catch_reward": 5.0,       # default: 5.0
    "urgency_reward": -0.1,    # default: -0.1
    "surround": True,          # default: True
    "constraint_window": 1.0,  # default: 1.0
    ###---- Additional Config ----###
    # "att_memory" : False,
    # "max_tasks" : 10  
}

max_cycles = SISL_Config["max_cycles"]
n_agents = SISL_Config["n_pursuers"]

dqn_params = {"discount_factor": 0.99, 
              "estimation_step": 30, 
              "target_update_freq": 2400,#max_cycles * n_agents,
              "optminizer": "Adam",
              "lr": 0.00001 }

trainer_params = {"max_epoch": 200,
                  "step_per_epoch": 20000,#5 * (150 * n_agents),
                  "step_per_collect": 800,# * (10 * n_agents),
                  "episode_per_test": 20,
                  "batch_size" : 128 * n_agents,
                  "update_per_step": 1 / 80, #Only run after close a Collect (run many times as necessary to meet the value)
                  "tn_eps_max": 0.15,
                  "ts_eps_max": 0.0,
                  "warmup_size" : 1
                  }


runConfig = dqn_params
runConfig.update(Policy_Config)
runConfig.update(trainer_params) 
runConfig.update(SISL_Config)

model_load_path = os.path.join(policy_path, load_policy_name)  
model_save_path = os.path.join(policy_path, save_policy_name)        
os.makedirs(os.path.join(policy_path), exist_ok=True)  
os.makedirs(os.path.join(log_path), exist_ok=True)

def _get_agents(
    agent_learn: Optional[BasePolicy] = None,
    agent_opponent: Optional[BasePolicy] = None,
    optim: Optional[torch.optim.Optimizer] = None,
    policy_load_path = None,
) -> Tuple[BasePolicy, torch.optim.Optimizer, list]:
    
    env = _get_env()       
    agent_observation_space = env.observation_space
   
    action_shape = env.action_space
    
    device="cuda" if torch.cuda.is_available() else "cpu"  

    agents = []        
    
    if Policy_Config["same_policy"]:
        policies_number = 1
    else:
        policies_number = 4#len(env.agents)

    for _ in range(policies_number):      
        
        if model == "MultiHead_SISL":
            net = MultiHead_SISL(
                obs_shape=agent_observation_space,                
                num_tasks=5,
                hidden_sizes = 32,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)
        
        if model == "SISL_Task_MultiHead":
            net = SISL_Task_MultiHead(                
                num_tasks=20,
                num_features_per_task = 9,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "DNN_SISL":
            net = DNN_SISL(
                obs_shape=agent_observation_space,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)

        if model == "CNN_SISL":
            net = CNN_SISL(
                obs_shape=agent_observation_space.shape,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)            
        
        if model == "CNN_ATT_SISL":
            net = CNN_ATT_SISL(
                obs_shape=agent_observation_space.shape,                
                action_shape=5,                
                device="cuda" if torch.cuda.is_available() else "cpu"
                
            ).to(device)           
        
        
    
        if Policy_Config["freeze_CNN"]:                
                net.freeze_conv_layers()  # Freeze the convolutional layers

                optim = torch.optim.Adam(
                    list(net.policy_fn.parameters()) + list(net.value_fn.parameters()), 
                    lr=dqn_params["lr"]
                )
        else:
            optim = torch.optim.Adam(net.parameters(), lr=dqn_params["lr"], weight_decay=0.0, amsgrad= True )                



        if policyModel == "DQN":
            agent_learn = DQNPolicy(
                model=net,
                optim=optim,
                action_space = action_shape,
                discount_factor= dqn_params["discount_factor"],
                estimation_step=dqn_params["estimation_step"],
                target_update_freq=dqn_params["target_update_freq"],
                reward_normalization = False,
                clip_loss_grad = False 
            ) 
         
 
        if Policy_Config["load_model"] is True:
            # Load the saved checkpoint             
            agent_learn.load_state_dict(torch.load(model_load_path))
            print(f'Loaded-> {model_load_path}')
                   
        #print(env.agents)
        #agents = [agent_learn for _ in range(len(env.agents))]
        
        agents.append(agent_learn)

    if Policy_Config["same_policy"]:
        agents = [agents[0] for _ in range(len(env.agents))]
    else:
        for _ in range(len(env.agents) - policies_number):
            agents.append(agents[0])

    policy = MultiAgentPolicyManager(policies = agents, env=env)  
        
    return policy, optim, env.agents

def _get_env(test=False):
    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    # env = pursuit_v4.env()
    if not test:
        env = TaskPursuitEnv.env(
                max_cycles=SISL_Config["max_cycles"],
                x_size=SISL_Config["x_size"],
                y_size=SISL_Config["y_size"],
                shared_reward=SISL_Config["shared_reward"],
                n_evaders=SISL_Config["n_evaders"],
                n_pursuers=SISL_Config["n_pursuers"],
                obs_range=SISL_Config["obs_range"],
                n_catch=SISL_Config["n_catch"],
                freeze_evaders=SISL_Config["freeze_evaders"],
                tag_reward=SISL_Config["tag_reward"],
                catch_reward=SISL_Config["catch_reward"],
                urgency_reward=SISL_Config["urgency_reward"],
                surround=SISL_Config["surround"],
                constraint_window=SISL_Config["constraint_window"],
                # att_memory = SISL_Config["att_memory"],
                #render_mode= "human"#True
                render_mode= None#"human"#True
            )
    else:
        env = TaskPursuitEnv.env(
                max_cycles=SISL_Config["max_cycles"],
                x_size=SISL_Config["x_size"],
                y_size=SISL_Config["y_size"],
                shared_reward=SISL_Config["shared_reward"],
                n_evaders=SISL_Config["n_evaders"],
                n_pursuers=SISL_Config["n_pursuers"],
                obs_range=SISL_Config["obs_range"],
                n_catch=SISL_Config["n_catch"],
                freeze_evaders=SISL_Config["freeze_evaders"],
                tag_reward=SISL_Config["tag_reward"],
                catch_reward=SISL_Config["catch_reward"],
                urgency_reward=SISL_Config["urgency_reward"],
                surround=SISL_Config["surround"],
                constraint_window=SISL_Config["constraint_window"],
                # att_memory = SISL_Config["att_memory"],
                #render_mode= "human"#True
                render_mode= None#"human"#True
            )

    
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    
    return PettingZooEnv(env)   

# print(json.dumps(runConfig, indent=4))


In [2]:
if __name__ == "__main__":
                        
    torch.set_grad_enabled(True) 
   
    # ======== Step 1: Environment setup =========
    train_envs = DummyVectorEnv([_get_env for _ in range(train_env_num)])
    test_envs = DummyVectorEnv([_get_env for _ in range(test_env_num)]) 

    # seed
    seed = 0
    np.random.seed(seed)
    
    torch.manual_seed(seed)

    train_envs.seed(seed)
    test_envs.seed(seed)

    # ======== Step 2: Agent setup =========
    policy, optim, agents = _get_agents()    

    # ======== Step 3: Collector setup =========
    train_collector = Collector(
        policy,
        train_envs,
        # VectorReplayBuffer(300_000, len(train_envs)),
        PrioritizedVectorReplayBuffer( 300_000, len(train_envs), alpha=0.6, beta=0.4) , 
        #ListReplayBuffer(100000)       
        # buffer = StateMemoryVectorReplayBuffer(
        #         300_000,
        #         len(train_envs),  # Assuming train_envs is your vectorized environment
        #         memory_size=10,                
        #     ),
        exploration_noise=True             
    )
    test_collector = Collector(policy, test_envs, exploration_noise=True)
     
    print("Buffer Warming Up ")    
    # for i in range(trainer_params["warmup_size"]):#int(trainer_params['batch_size'] / (300 * 10 ) )):
        
    #     train_collector.collect(n_episode=train_env_num)#,random=True) #trainer_params['batch_size'] * train_env_num))
    #     #train_collector.collect(n_step=300 * 10)
    #     print(".", end="") 
    
    len_buffer = len(train_collector.buffer) / (SISL_Config["max_cycles"] * SISL_Config["n_pursuers"])
    print("\nBuffer Lenght: ", len_buffer ) 
    
    info = { "Buffer"  : "PriorizedReplayBuffer", " Warmup_ep" : len_buffer}
    # ======== tensorboard logging setup =========                       
    logger = WandbLogger(
        train_interval = runConfig["max_cycles"] * runConfig["n_pursuers"] ,
        test_interval = 1,#runConfig["max_cycles"] * runConfig["n_pursuers"],
        update_interval = runConfig["max_cycles"],
        save_interval = 1,
        write_flush = True,
        project = "SISL_Eval01",
        name = log_name,
        entity = None,
        run_id = log_name,
        config = runConfig,
        monitor_gym = True )
    
    writer = SummaryWriter(log_path)    
    writer.add_text("args", str(runConfig))    
    logger.load(writer)

    
    global_step_holder = [0] 
    
    
    # ======== Step 4: Callback functions setup =========
    def save_best_fn(policy):                
        
        if Policy_Config["same_policy"]:
            torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_BestRew.pth")
            print("Best Saved Rew" , str(global_step_holder[0]))
        
        else:
            for n,agent in enumerate(agents):
                torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + ".pth")
            
            print("Bests Saved Rew" , str(global_step_holder[0]))
        
    def save_test_best_fn(policy):                
        
        if Policy_Config["same_policy"]:
            torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_BestLen.pth")
            print("Best Saved Length" , str(global_step_holder[0]))
        
        else:
            for n,agent in enumerate(agents):
                torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + ".pth")
            
            print("Best Saved Length" , str(global_step_holder[0]))
        

    def stop_fn(mean_rewards):
        return mean_rewards >= 99999939.0

    def train_fn(epoch, env_step):
        epsilon = trainer_params['tn_eps_max'] - (trainer_params['tn_eps_max'] - trainer_params['tn_eps_max']/100)*(epoch/trainer_params['max_epoch'])          
        if Policy_Config["same_policy"]:
            policy.policies[agents[0]].set_eps(epsilon)
        else:
            for agent in agents:
                policy.policies[agent].set_eps(epsilon)
                
        
        # if env_step % 500 == 0:
            # logger.write("train/env_step", env_step, {"train/eps": eps})


    def test_fn(epoch, env_step):
               
        epsilon = trainer_params['ts_eps_max']#0.01#max(0.001, 0.1 - epoch * 0.001)
        if Policy_Config["same_policy"]:
            policy.policies[agents[0]].set_eps(epsilon)
        else:            
            for agent in agents:                             
                 policy.policies[agent].set_eps(epsilon)
                
        
        if global_step_holder[0] % 10 == 0:
            
            if Policy_Config["same_policy"]:
                torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_Step.pth")
                print("Steps Policy Saved " , str(global_step_holder[0]))
            
            else:
                for n,agent in enumerate(agents):
                    torch.save(policy.policies[agent].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + "_" + agent + "Step" + str(global_step_holder[0]) + ".pth")
                
                print("Steps Policy Saved " , str(global_step_holder[0]))

        
    def reward_metric(rews):       
                
        global_step_holder[0] +=1 
        return np.sum(rews, axis = 1)


    # # ======== Step 5: Run the trainer =========
    offPolicyTrainer = OffpolicyTrainer(
        policy=policy,
        train_collector=train_collector,
        test_collector=test_collector,        
        max_epoch=trainer_params['max_epoch'],
        step_per_epoch=trainer_params['step_per_epoch'],
        step_per_collect=trainer_params['step_per_collect'],        
        episode_per_test= trainer_params['episode_per_test'],
        batch_size=trainer_params['batch_size'],
        train_fn=train_fn,
        test_fn=test_fn,
        stop_fn=stop_fn,
        save_best_fn=save_best_fn,
        # save_test_best_fn=save_test_best_fn,
        update_per_step=trainer_params['update_per_step'],
        logger=logger,
        test_in_train=True,
        reward_metric=reward_metric,
        show_progress = True 
               
        )
    
    result = offPolicyTrainer.run()
    writer.close()
    # return result, policy.policies[agents[1]]
    print(f"\n==========Result==========\n{result}")
    print("\n(the trained policy can be accessed via policy.policies[agents[0]])")



Buffer Warming Up 

Buffer Lenght:  0.0


[34m[1mwandb[0m: Currently logged in as: [33mandrekuros[0m. Use [1m`wandb login --relogin`[0m to force relogin
  from IPython.core.display import HTML, display  # type: ignore


Steps Policy Saved  0
Best Saved Rew 1


Epoch #1: 20001it [01:39, 200.24it/s, env_step=20000, len=0, n/ep=0, n/st=800, pursuer_0/loss=0.798, pursuer_1/loss=0.760, pursuer_2/loss=1.176, pursuer_3/loss=0.955, pursuer_4/loss=0.760, pursuer_5/loss=0.913, pursuer_6/loss=1.148, pursuer_7/loss=0.996, rew=0.00]                           


Epoch #1: test_reward: -38.497375 ± 78.768020, best_reward: 48.445625 ± 117.122657 in #0


Epoch #2: 20001it [01:25, 234.50it/s, env_step=40000, len=4000, n/ep=6, n/st=800, pursuer_0/loss=1.364, pursuer_1/loss=1.351, pursuer_2/loss=1.622, pursuer_3/loss=1.503, pursuer_4/loss=1.525, pursuer_5/loss=1.535, pursuer_6/loss=1.614, pursuer_7/loss=1.791, rew=185.64]                           


Best Saved Rew 8
Epoch #2: test_reward: 116.899250 ± 113.813374, best_reward: 116.899250 ± 113.813374 in #2


Epoch #3: 20001it [01:30, 221.30it/s, env_step=60000, len=2056, n/ep=0, n/st=800, pursuer_0/loss=1.703, pursuer_1/loss=1.742, pursuer_2/loss=1.983, pursuer_3/loss=1.901, pursuer_4/loss=2.031, pursuer_5/loss=1.991, pursuer_6/loss=1.963, pursuer_7/loss=2.016, rew=274.08]                           


Best Saved Rew 10
Epoch #3: test_reward: 302.829875 ± 141.298814, best_reward: 302.829875 ± 141.298814 in #3


Epoch #4: 20001it [01:30, 222.07it/s, env_step=80000, len=4000, n/ep=4, n/st=800, pursuer_0/loss=1.963, pursuer_1/loss=1.815, pursuer_2/loss=2.076, pursuer_3/loss=2.000, pursuer_4/loss=1.995, pursuer_5/loss=2.005, pursuer_6/loss=2.215, pursuer_7/loss=2.142, rew=194.37]                           


Best Saved Rew 19
Epoch #4: test_reward: 392.230000 ± 137.611123, best_reward: 392.230000 ± 137.611123 in #4


Epoch #5: 20001it [01:28, 225.71it/s, env_step=100000, len=1704, n/ep=0, n/st=800, pursuer_0/loss=2.036, pursuer_1/loss=2.120, pursuer_2/loss=2.325, pursuer_3/loss=2.337, pursuer_4/loss=2.371, pursuer_5/loss=2.393, pursuer_6/loss=2.496, pursuer_7/loss=2.414, rew=400.76]                           


Epoch #5: test_reward: 375.843750 ± 169.139166, best_reward: 392.230000 ± 137.611123 in #4


Epoch #6: 20001it [01:27, 229.73it/s, env_step=120000, len=2264, n/ep=1, n/st=800, pursuer_0/loss=2.229, pursuer_1/loss=2.279, pursuer_2/loss=2.485, pursuer_3/loss=2.469, pursuer_4/loss=2.641, pursuer_5/loss=2.563, pursuer_6/loss=2.616, pursuer_7/loss=2.676, rew=507.23]                           


Best Saved Rew 37
Epoch #6: test_reward: 489.263312 ± 78.745256, best_reward: 489.263312 ± 78.745256 in #6


Epoch #7: 20001it [01:28, 226.50it/s, env_step=140000, len=2024, n/ep=1, n/st=800, pursuer_0/loss=2.355, pursuer_1/loss=2.324, pursuer_2/loss=2.453, pursuer_3/loss=2.597, pursuer_4/loss=2.767, pursuer_5/loss=2.753, pursuer_6/loss=2.614, pursuer_7/loss=2.801, rew=355.58]                           


Best Saved Rew 49
Epoch #7: test_reward: 500.506375 ± 88.845245, best_reward: 500.506375 ± 88.845245 in #7


Epoch #8: 20001it [01:27, 228.20it/s, env_step=160000, len=1864, n/ep=0, n/st=800, pursuer_0/loss=2.468, pursuer_1/loss=2.455, pursuer_2/loss=2.589, pursuer_3/loss=2.758, pursuer_4/loss=2.964, pursuer_5/loss=2.832, pursuer_6/loss=2.867, pursuer_7/loss=2.851, rew=462.15]                           


Best Saved Rew 60
Epoch #8: test_reward: 537.559625 ± 91.014617, best_reward: 537.559625 ± 91.014617 in #8


Epoch #9: 20001it [01:27, 227.39it/s, env_step=180000, len=1320, n/ep=1, n/st=800, pursuer_0/loss=2.695, pursuer_1/loss=2.521, pursuer_2/loss=2.861, pursuer_3/loss=2.934, pursuer_4/loss=3.128, pursuer_5/loss=3.040, pursuer_6/loss=2.956, pursuer_7/loss=3.031, rew=382.46]                           


Epoch #9: test_reward: 530.695500 ± 103.424653, best_reward: 537.559625 ± 91.014617 in #8


Epoch #10: 20001it [01:28, 226.70it/s, env_step=200000, len=1480, n/ep=0, n/st=800, pursuer_0/loss=2.780, pursuer_1/loss=2.745, pursuer_2/loss=2.995, pursuer_3/loss=3.017, pursuer_4/loss=3.143, pursuer_5/loss=3.258, pursuer_6/loss=3.204, pursuer_7/loss=3.150, rew=471.23]                           


Epoch #10: test_reward: 531.419312 ± 91.614346, best_reward: 537.559625 ± 91.014617 in #8


Epoch #11: 20001it [01:28, 226.96it/s, env_step=220000, len=2312, n/ep=0, n/st=800, pursuer_0/loss=2.889, pursuer_1/loss=2.736, pursuer_2/loss=3.123, pursuer_3/loss=2.938, pursuer_4/loss=3.305, pursuer_5/loss=3.216, pursuer_6/loss=3.223, pursuer_7/loss=3.192, rew=313.61]                           


Steps Policy Saved  90
Epoch #11: test_reward: 510.837625 ± 82.656625, best_reward: 537.559625 ± 91.014617 in #8


Epoch #12: 20001it [01:27, 229.27it/s, env_step=240000, len=1644, n/ep=0, n/st=800, pursuer_0/loss=2.852, pursuer_1/loss=2.709, pursuer_2/loss=3.103, pursuer_3/loss=3.117, pursuer_4/loss=3.139, pursuer_5/loss=3.212, pursuer_6/loss=3.184, pursuer_7/loss=3.205, rew=508.36]                           


Epoch #12: test_reward: 528.780687 ± 80.743146, best_reward: 537.559625 ± 91.014617 in #8


Epoch #13: 20001it [01:26, 230.58it/s, env_step=260000, len=1192, n/ep=0, n/st=800, pursuer_0/loss=2.953, pursuer_1/loss=2.762, pursuer_2/loss=3.118, pursuer_3/loss=3.194, pursuer_4/loss=3.292, pursuer_5/loss=3.345, pursuer_6/loss=3.416, pursuer_7/loss=3.445, rew=348.85]                           


Epoch #13: test_reward: 483.343312 ± 85.249801, best_reward: 537.559625 ± 91.014617 in #8


Epoch #14: 20001it [01:28, 227.17it/s, env_step=280000, len=1336, n/ep=0, n/st=800, pursuer_0/loss=2.908, pursuer_1/loss=2.808, pursuer_2/loss=3.187, pursuer_3/loss=3.220, pursuer_4/loss=3.417, pursuer_5/loss=3.447, pursuer_6/loss=3.404, pursuer_7/loss=3.547, rew=558.40]                           


Best Saved Rew 125
Epoch #14: test_reward: 555.656250 ± 77.508506, best_reward: 555.656250 ± 77.508506 in #14


Epoch #15: 20001it [01:27, 228.25it/s, env_step=300000, len=1256, n/ep=0, n/st=800, pursuer_0/loss=3.019, pursuer_1/loss=3.032, pursuer_2/loss=3.331, pursuer_3/loss=3.312, pursuer_4/loss=3.470, pursuer_5/loss=3.521, pursuer_6/loss=3.557, pursuer_7/loss=3.684, rew=510.95]                           


Epoch #15: test_reward: 482.341437 ± 70.150370, best_reward: 555.656250 ± 77.508506 in #14


Epoch #16: 20001it [01:25, 233.18it/s, env_step=320000, len=1412, n/ep=2, n/st=800, pursuer_0/loss=3.139, pursuer_1/loss=3.095, pursuer_2/loss=3.465, pursuer_3/loss=3.551, pursuer_4/loss=3.503, pursuer_5/loss=3.760, pursuer_6/loss=3.644, pursuer_7/loss=3.886, rew=403.23]                           


Epoch #16: test_reward: 523.643812 ± 76.982026, best_reward: 555.656250 ± 77.508506 in #14


Epoch #17: 20001it [01:26, 231.04it/s, env_step=340000, len=2104, n/ep=1, n/st=800, pursuer_0/loss=3.103, pursuer_1/loss=3.146, pursuer_2/loss=3.390, pursuer_3/loss=3.572, pursuer_4/loss=3.777, pursuer_5/loss=3.650, pursuer_6/loss=3.906, pursuer_7/loss=3.785, rew=365.55]                           


Epoch #17: test_reward: 489.918187 ± 79.631289, best_reward: 555.656250 ± 77.508506 in #14


Epoch #18: 20001it [01:26, 232.14it/s, env_step=360000, len=1824, n/ep=0, n/st=800, pursuer_0/loss=3.180, pursuer_1/loss=3.117, pursuer_2/loss=3.550, pursuer_3/loss=3.641, pursuer_4/loss=3.666, pursuer_5/loss=3.670, pursuer_6/loss=3.714, pursuer_7/loss=3.964, rew=516.25]                           


Epoch #18: test_reward: 496.984937 ± 68.625787, best_reward: 555.656250 ± 77.508506 in #14


Epoch #19: 20001it [01:26, 231.34it/s, env_step=380000, len=1464, n/ep=0, n/st=800, pursuer_0/loss=3.439, pursuer_1/loss=3.361, pursuer_2/loss=3.737, pursuer_3/loss=3.870, pursuer_4/loss=4.122, pursuer_5/loss=4.094, pursuer_6/loss=4.128, pursuer_7/loss=4.097, rew=436.24]                           


Epoch #19: test_reward: 538.750437 ± 79.114779, best_reward: 555.656250 ± 77.508506 in #14


Epoch #20: 20001it [01:25, 233.57it/s, env_step=400000, len=1512, n/ep=0, n/st=800, pursuer_0/loss=3.409, pursuer_1/loss=3.335, pursuer_2/loss=3.745, pursuer_3/loss=4.021, pursuer_4/loss=3.995, pursuer_5/loss=4.073, pursuer_6/loss=3.829, pursuer_7/loss=4.294, rew=454.20]                           


Epoch #20: test_reward: 500.893062 ± 59.530152, best_reward: 555.656250 ± 77.508506 in #14


Epoch #21: 20001it [01:26, 232.48it/s, env_step=420000, len=1200, n/ep=0, n/st=800, pursuer_0/loss=3.467, pursuer_1/loss=3.438, pursuer_2/loss=3.997, pursuer_3/loss=3.935, pursuer_4/loss=4.142, pursuer_5/loss=4.046, pursuer_6/loss=4.101, pursuer_7/loss=4.289, rew=634.68]                           


Epoch #21: test_reward: 553.850562 ± 80.598820, best_reward: 555.656250 ± 77.508506 in #14


Epoch #22: 20001it [01:26, 230.42it/s, env_step=440000, len=1464, n/ep=1, n/st=800, pursuer_0/loss=3.537, pursuer_1/loss=3.470, pursuer_2/loss=3.826, pursuer_3/loss=4.059, pursuer_4/loss=4.028, pursuer_5/loss=4.017, pursuer_6/loss=4.035, pursuer_7/loss=4.161, rew=547.18]                           


Epoch #22: test_reward: 518.532000 ± 97.034739, best_reward: 555.656250 ± 77.508506 in #14


Epoch #23: 20001it [01:34, 210.62it/s, env_step=460000, len=1784, n/ep=1, n/st=800, pursuer_0/loss=3.527, pursuer_1/loss=3.419, pursuer_2/loss=3.953, pursuer_3/loss=4.017, pursuer_4/loss=3.959, pursuer_5/loss=4.206, pursuer_6/loss=4.259, pursuer_7/loss=4.210, rew=624.42]                           


Best Saved Rew 232
Epoch #23: test_reward: 566.779437 ± 100.774821, best_reward: 566.779437 ± 100.774821 in #23


Epoch #24: 20001it [01:25, 232.98it/s, env_step=480000, len=1576, n/ep=1, n/st=800, pursuer_0/loss=3.586, pursuer_1/loss=3.519, pursuer_2/loss=4.034, pursuer_3/loss=4.119, pursuer_4/loss=4.167, pursuer_5/loss=4.226, pursuer_6/loss=4.167, pursuer_7/loss=4.358, rew=623.63]                           


Epoch #24: test_reward: 531.503562 ± 85.277104, best_reward: 566.779437 ± 100.774821 in #23


Epoch #25: 20001it [01:27, 228.10it/s, env_step=500000, len=1448, n/ep=0, n/st=800, pursuer_0/loss=3.549, pursuer_1/loss=3.557, pursuer_2/loss=4.007, pursuer_3/loss=4.232, pursuer_4/loss=4.242, pursuer_5/loss=4.242, pursuer_6/loss=4.264, pursuer_7/loss=4.524, rew=517.63]                           


Epoch #25: test_reward: 549.388062 ± 92.057134, best_reward: 566.779437 ± 100.774821 in #23


Epoch #26: 20001it [01:26, 230.71it/s, env_step=520000, len=1112, n/ep=1, n/st=800, pursuer_0/loss=3.669, pursuer_1/loss=3.604, pursuer_2/loss=4.108, pursuer_3/loss=4.264, pursuer_4/loss=4.342, pursuer_5/loss=4.313, pursuer_6/loss=4.387, pursuer_7/loss=4.586, rew=389.63]                           


Epoch #26: test_reward: 511.620125 ± 79.714440, best_reward: 566.779437 ± 100.774821 in #23


Epoch #27: 20001it [01:26, 230.37it/s, env_step=540000, len=2040, n/ep=1, n/st=800, pursuer_0/loss=3.651, pursuer_1/loss=3.485, pursuer_2/loss=4.086, pursuer_3/loss=4.186, pursuer_4/loss=4.272, pursuer_5/loss=4.311, pursuer_6/loss=4.298, pursuer_7/loss=4.422, rew=578.02]                           


Best Saved Rew 279
Epoch #27: test_reward: 571.816687 ± 79.546905, best_reward: 571.816687 ± 79.546905 in #27


Epoch #28: 20001it [01:29, 223.41it/s, env_step=560000, len=1400, n/ep=1, n/st=800, pursuer_0/loss=3.487, pursuer_1/loss=3.439, pursuer_2/loss=3.959, pursuer_3/loss=4.088, pursuer_4/loss=4.232, pursuer_5/loss=4.184, pursuer_6/loss=4.363, pursuer_7/loss=4.345, rew=651.72]                           


Best Saved Rew 292
Epoch #28: test_reward: 623.134937 ± 109.352836, best_reward: 623.134937 ± 109.352836 in #28


Epoch #29: 20001it [01:29, 224.02it/s, env_step=580000, len=1212, n/ep=0, n/st=800, pursuer_0/loss=3.568, pursuer_1/loss=3.538, pursuer_2/loss=3.935, pursuer_3/loss=4.134, pursuer_4/loss=4.313, pursuer_5/loss=4.235, pursuer_6/loss=4.163, pursuer_7/loss=4.367, rew=567.74]                           


Epoch #29: test_reward: 551.384625 ± 87.674356, best_reward: 623.134937 ± 109.352836 in #28


Epoch #30: 20001it [01:28, 225.05it/s, env_step=600000, len=1800, n/ep=0, n/st=800, pursuer_0/loss=3.695, pursuer_1/loss=3.528, pursuer_2/loss=3.962, pursuer_3/loss=4.119, pursuer_4/loss=4.253, pursuer_5/loss=4.143, pursuer_6/loss=4.211, pursuer_7/loss=4.421, rew=393.01]                           


Epoch #30: test_reward: 530.094187 ± 104.006043, best_reward: 623.134937 ± 109.352836 in #28


Epoch #31: 20001it [01:32, 216.66it/s, env_step=620000, len=1504, n/ep=0, n/st=800, pursuer_0/loss=3.530, pursuer_1/loss=3.580, pursuer_2/loss=3.958, pursuer_3/loss=4.008, pursuer_4/loss=4.360, pursuer_5/loss=4.252, pursuer_6/loss=4.107, pursuer_7/loss=4.424, rew=600.72]                           


Epoch #31: test_reward: 578.187062 ± 95.102389, best_reward: 623.134937 ± 109.352836 in #28


Epoch #32: 20001it [01:34, 210.99it/s, env_step=640000, len=2064, n/ep=0, n/st=800, pursuer_0/loss=3.539, pursuer_1/loss=3.579, pursuer_2/loss=3.962, pursuer_3/loss=4.077, pursuer_4/loss=4.154, pursuer_5/loss=4.204, pursuer_6/loss=4.184, pursuer_7/loss=4.416, rew=320.08]                           


Epoch #32: test_reward: 585.217062 ± 90.042286, best_reward: 623.134937 ± 109.352836 in #28


Epoch #33: 20001it [01:26, 231.87it/s, env_step=660000, len=1088, n/ep=0, n/st=800, pursuer_0/loss=3.578, pursuer_1/loss=3.629, pursuer_2/loss=3.932, pursuer_3/loss=4.117, pursuer_4/loss=4.398, pursuer_5/loss=4.326, pursuer_6/loss=4.016, pursuer_7/loss=4.617, rew=651.67]                           


Epoch #33: test_reward: 554.997562 ± 99.688103, best_reward: 623.134937 ± 109.352836 in #28


Epoch #34: 20001it [01:32, 215.23it/s, env_step=680000, len=1464, n/ep=0, n/st=800, pursuer_0/loss=3.749, pursuer_1/loss=3.858, pursuer_2/loss=4.174, pursuer_3/loss=4.292, pursuer_4/loss=4.473, pursuer_5/loss=4.379, pursuer_6/loss=4.347, pursuer_7/loss=4.578, rew=554.00]                           


Epoch #34: test_reward: 568.627375 ± 119.511261, best_reward: 623.134937 ± 109.352836 in #28


Epoch #35: 20001it [01:31, 218.59it/s, env_step=700000, len=1648, n/ep=0, n/st=800, pursuer_0/loss=3.625, pursuer_1/loss=3.870, pursuer_2/loss=4.105, pursuer_3/loss=4.250, pursuer_4/loss=4.487, pursuer_5/loss=4.275, pursuer_6/loss=4.407, pursuer_7/loss=4.562, rew=473.81]                           


Epoch #35: test_reward: 572.255000 ± 95.422960, best_reward: 623.134937 ± 109.352836 in #28


Epoch #36: 20001it [01:25, 234.38it/s, env_step=720000, len=1496, n/ep=1, n/st=800, pursuer_0/loss=3.708, pursuer_1/loss=3.769, pursuer_2/loss=4.178, pursuer_3/loss=4.146, pursuer_4/loss=4.385, pursuer_5/loss=4.379, pursuer_6/loss=4.432, pursuer_7/loss=4.577, rew=492.44]                           


Epoch #36: test_reward: 609.815625 ± 143.445954, best_reward: 623.134937 ± 109.352836 in #28


Epoch #37: 20001it [01:25, 234.05it/s, env_step=740000, len=1636, n/ep=2, n/st=800, pursuer_0/loss=3.830, pursuer_1/loss=3.856, pursuer_2/loss=4.227, pursuer_3/loss=4.124, pursuer_4/loss=4.555, pursuer_5/loss=4.212, pursuer_6/loss=4.454, pursuer_7/loss=4.518, rew=561.95]                           


Best Saved Rew 410
Epoch #37: test_reward: 661.870625 ± 126.197348, best_reward: 661.870625 ± 126.197348 in #37


Epoch #38: 20001it [01:23, 239.91it/s, env_step=760000, len=1360, n/ep=1, n/st=800, pursuer_0/loss=3.671, pursuer_1/loss=3.710, pursuer_2/loss=4.061, pursuer_3/loss=4.077, pursuer_4/loss=4.343, pursuer_5/loss=4.138, pursuer_6/loss=4.241, pursuer_7/loss=4.489, rew=669.60]                           


Epoch #38: test_reward: 613.946625 ± 106.593127, best_reward: 661.870625 ± 126.197348 in #37


Epoch #39: 20001it [01:24, 237.58it/s, env_step=780000, len=1120, n/ep=0, n/st=800, pursuer_0/loss=3.649, pursuer_1/loss=3.750, pursuer_2/loss=3.993, pursuer_3/loss=4.317, pursuer_4/loss=4.381, pursuer_5/loss=4.107, pursuer_6/loss=4.366, pursuer_7/loss=4.402, rew=464.03]                           


Epoch #39: test_reward: 620.758000 ± 121.859499, best_reward: 661.870625 ± 126.197348 in #37


Epoch #40: 20001it [01:27, 227.87it/s, env_step=800000, len=848, n/ep=1, n/st=800, pursuer_0/loss=3.571, pursuer_1/loss=3.631, pursuer_2/loss=3.980, pursuer_3/loss=4.072, pursuer_4/loss=4.370, pursuer_5/loss=4.036, pursuer_6/loss=4.178, pursuer_7/loss=4.275, rew=475.45]                            


Epoch #40: test_reward: 654.886500 ± 109.548513, best_reward: 661.870625 ± 126.197348 in #37


Epoch #41: 20001it [01:28, 224.74it/s, env_step=820000, len=1336, n/ep=0, n/st=800, pursuer_0/loss=3.770, pursuer_1/loss=3.656, pursuer_2/loss=4.137, pursuer_3/loss=4.123, pursuer_4/loss=4.480, pursuer_5/loss=4.199, pursuer_6/loss=4.294, pursuer_7/loss=4.476, rew=732.99]                           


Epoch #41: test_reward: 615.148437 ± 129.191964, best_reward: 661.870625 ± 126.197348 in #37


Epoch #42: 20001it [01:32, 215.28it/s, env_step=840000, len=1616, n/ep=0, n/st=800, pursuer_0/loss=3.681, pursuer_1/loss=3.855, pursuer_2/loss=3.983, pursuer_3/loss=4.226, pursuer_4/loss=4.530, pursuer_5/loss=4.369, pursuer_6/loss=4.364, pursuer_7/loss=4.659, rew=685.92]                           


Epoch #42: test_reward: 637.095250 ± 128.241442, best_reward: 661.870625 ± 126.197348 in #37


Epoch #43: 20001it [01:29, 223.06it/s, env_step=860000, len=1328, n/ep=0, n/st=800, pursuer_0/loss=3.683, pursuer_1/loss=3.715, pursuer_2/loss=4.106, pursuer_3/loss=4.079, pursuer_4/loss=4.288, pursuer_5/loss=4.303, pursuer_6/loss=4.281, pursuer_7/loss=4.414, rew=639.18]                           


Epoch #43: test_reward: 620.995812 ± 99.961612, best_reward: 661.870625 ± 126.197348 in #37


Epoch #44: 20001it [01:31, 217.59it/s, env_step=880000, len=1468, n/ep=2, n/st=800, pursuer_0/loss=4.026, pursuer_1/loss=4.000, pursuer_2/loss=4.383, pursuer_3/loss=4.356, pursuer_4/loss=4.649, pursuer_5/loss=4.424, pursuer_6/loss=4.604, pursuer_7/loss=4.732, rew=637.56]                           


Epoch #44: test_reward: 655.601125 ± 80.257712, best_reward: 661.870625 ± 126.197348 in #37


Epoch #45: 20001it [01:31, 218.10it/s, env_step=900000, len=1696, n/ep=0, n/st=800, pursuer_0/loss=3.918, pursuer_1/loss=3.801, pursuer_2/loss=4.259, pursuer_3/loss=4.389, pursuer_4/loss=4.633, pursuer_5/loss=4.467, pursuer_6/loss=4.520, pursuer_7/loss=4.677, rew=617.75]                           


Epoch #45: test_reward: 623.973312 ± 117.650093, best_reward: 661.870625 ± 126.197348 in #37


Epoch #46: 20001it [01:29, 222.95it/s, env_step=920000, len=1056, n/ep=2, n/st=800, pursuer_0/loss=4.268, pursuer_1/loss=4.206, pursuer_2/loss=4.434, pursuer_3/loss=4.703, pursuer_4/loss=4.764, pursuer_5/loss=5.117, pursuer_6/loss=4.917, pursuer_7/loss=4.990, rew=657.09]                           


Epoch #46: test_reward: 551.821187 ± 105.987660, best_reward: 661.870625 ± 126.197348 in #37


Epoch #47: 20001it [01:31, 219.08it/s, env_step=940000, len=2016, n/ep=0, n/st=800, pursuer_0/loss=4.202, pursuer_1/loss=4.043, pursuer_2/loss=4.426, pursuer_3/loss=4.413, pursuer_4/loss=4.758, pursuer_5/loss=4.647, pursuer_6/loss=4.648, pursuer_7/loss=4.904, rew=492.73]                           


Epoch #47: test_reward: 577.345875 ± 136.210452, best_reward: 661.870625 ± 126.197348 in #37


Epoch #48: 20001it [01:25, 234.01it/s, env_step=960000, len=1056, n/ep=1, n/st=800, pursuer_0/loss=4.063, pursuer_1/loss=3.994, pursuer_2/loss=4.348, pursuer_3/loss=4.475, pursuer_4/loss=4.677, pursuer_5/loss=4.542, pursuer_6/loss=4.500, pursuer_7/loss=4.650, rew=410.31]                           


Epoch #48: test_reward: 642.413125 ± 126.871370, best_reward: 661.870625 ± 126.197348 in #37


Epoch #49: 20001it [01:27, 229.58it/s, env_step=980000, len=1360, n/ep=0, n/st=800, pursuer_0/loss=4.228, pursuer_1/loss=3.996, pursuer_2/loss=4.473, pursuer_3/loss=4.732, pursuer_4/loss=4.865, pursuer_5/loss=4.793, pursuer_6/loss=4.805, pursuer_7/loss=4.831, rew=645.37]                           


Steps Policy Saved  570
Epoch #49: test_reward: 596.474062 ± 81.621024, best_reward: 661.870625 ± 126.197348 in #37


Epoch #50: 20001it [01:25, 234.48it/s, env_step=1000000, len=1336, n/ep=1, n/st=800, pursuer_0/loss=4.113, pursuer_1/loss=4.015, pursuer_2/loss=4.405, pursuer_3/loss=4.440, pursuer_4/loss=4.687, pursuer_5/loss=4.558, pursuer_6/loss=4.535, pursuer_7/loss=4.771, rew=509.25]                           


Epoch #50: test_reward: 526.624125 ± 97.027345, best_reward: 661.870625 ± 126.197348 in #37


Epoch #51: 20001it [01:25, 234.00it/s, env_step=1020000, len=1480, n/ep=1, n/st=800, pursuer_0/loss=3.898, pursuer_1/loss=3.947, pursuer_2/loss=4.303, pursuer_3/loss=4.466, pursuer_4/loss=4.643, pursuer_5/loss=4.491, pursuer_6/loss=4.503, pursuer_7/loss=4.698, rew=593.59]                           


Epoch #51: test_reward: 580.702187 ± 111.487104, best_reward: 661.870625 ± 126.197348 in #37


Epoch #52: 20001it [01:26, 231.22it/s, env_step=1040000, len=1256, n/ep=0, n/st=800, pursuer_0/loss=3.818, pursuer_1/loss=3.773, pursuer_2/loss=4.237, pursuer_3/loss=4.568, pursuer_4/loss=4.670, pursuer_5/loss=4.372, pursuer_6/loss=4.561, pursuer_7/loss=4.694, rew=539.96]                           


Epoch #52: test_reward: 555.575625 ± 74.963844, best_reward: 661.870625 ± 126.197348 in #37


Epoch #53: 20001it [01:25, 233.39it/s, env_step=1060000, len=1320, n/ep=1, n/st=800, pursuer_0/loss=3.914, pursuer_1/loss=3.837, pursuer_2/loss=4.298, pursuer_3/loss=4.520, pursuer_4/loss=4.681, pursuer_5/loss=4.464, pursuer_6/loss=4.557, pursuer_7/loss=4.690, rew=434.90]                           


Epoch #53: test_reward: 588.032625 ± 108.317009, best_reward: 661.870625 ± 126.197348 in #37


Epoch #54: 20001it [01:29, 223.74it/s, env_step=1080000, len=1248, n/ep=0, n/st=800, pursuer_0/loss=3.952, pursuer_1/loss=3.918, pursuer_2/loss=4.232, pursuer_3/loss=4.559, pursuer_4/loss=4.785, pursuer_5/loss=4.598, pursuer_6/loss=4.421, pursuer_7/loss=4.760, rew=490.59]                           


Epoch #54: test_reward: 606.563000 ± 131.395430, best_reward: 661.870625 ± 126.197348 in #37


Epoch #55: 20001it [01:32, 215.56it/s, env_step=1100000, len=1424, n/ep=1, n/st=800, pursuer_0/loss=3.858, pursuer_1/loss=3.914, pursuer_2/loss=4.334, pursuer_3/loss=4.619, pursuer_4/loss=4.554, pursuer_5/loss=4.611, pursuer_6/loss=4.474, pursuer_7/loss=4.514, rew=719.48]                           


Epoch #55: test_reward: 538.676250 ± 99.188853, best_reward: 661.870625 ± 126.197348 in #37


Epoch #56: 20001it [01:30, 220.66it/s, env_step=1120000, len=1560, n/ep=0, n/st=800, pursuer_0/loss=3.997, pursuer_1/loss=3.953, pursuer_2/loss=4.280, pursuer_3/loss=4.506, pursuer_4/loss=4.644, pursuer_5/loss=4.827, pursuer_6/loss=4.528, pursuer_7/loss=4.671, rew=650.59]                           


Epoch #56: test_reward: 591.569812 ± 114.206470, best_reward: 661.870625 ± 126.197348 in #37


Epoch #57: 20001it [01:30, 222.12it/s, env_step=1140000, len=1208, n/ep=0, n/st=800, pursuer_0/loss=3.823, pursuer_1/loss=3.933, pursuer_2/loss=4.418, pursuer_3/loss=4.633, pursuer_4/loss=4.632, pursuer_5/loss=4.605, pursuer_6/loss=4.481, pursuer_7/loss=4.610, rew=680.32]                           


Epoch #57: test_reward: 533.964125 ± 78.823035, best_reward: 661.870625 ± 126.197348 in #37


Epoch #58: 20001it [01:30, 221.15it/s, env_step=1160000, len=1341, n/ep=0, n/st=800, pursuer_0/loss=4.104, pursuer_1/loss=3.939, pursuer_2/loss=4.311, pursuer_3/loss=4.501, pursuer_4/loss=4.727, pursuer_5/loss=4.476, pursuer_6/loss=4.398, pursuer_7/loss=4.746, rew=499.49]                           


Epoch #58: test_reward: 616.861437 ± 124.672362, best_reward: 661.870625 ± 126.197348 in #37


Epoch #59: 20001it [01:32, 216.85it/s, env_step=1180000, len=1496, n/ep=0, n/st=800, pursuer_0/loss=3.968, pursuer_1/loss=3.926, pursuer_2/loss=4.285, pursuer_3/loss=4.544, pursuer_4/loss=4.681, pursuer_5/loss=4.626, pursuer_6/loss=4.535, pursuer_7/loss=4.871, rew=575.17]                           


Epoch #59: test_reward: 562.203687 ± 81.295943, best_reward: 661.870625 ± 126.197348 in #37


Epoch #60: 20001it [01:31, 218.43it/s, env_step=1200000, len=1560, n/ep=1, n/st=800, pursuer_0/loss=3.848, pursuer_1/loss=3.862, pursuer_2/loss=4.345, pursuer_3/loss=4.490, pursuer_4/loss=4.592, pursuer_5/loss=4.590, pursuer_6/loss=4.468, pursuer_7/loss=4.767, rew=655.51]                           


Steps Policy Saved  720
Epoch #60: test_reward: 521.131750 ± 110.306456, best_reward: 661.870625 ± 126.197348 in #37


Epoch #61: 20001it [01:29, 223.15it/s, env_step=1220000, len=1600, n/ep=1, n/st=800, pursuer_0/loss=3.967, pursuer_1/loss=3.993, pursuer_2/loss=4.396, pursuer_3/loss=4.582, pursuer_4/loss=4.577, pursuer_5/loss=4.712, pursuer_6/loss=4.510, pursuer_7/loss=4.718, rew=743.28]                           


Epoch #61: test_reward: 530.340500 ± 73.163089, best_reward: 661.870625 ± 126.197348 in #37


Epoch #62: 20001it [01:27, 228.55it/s, env_step=1240000, len=1472, n/ep=1, n/st=800, pursuer_0/loss=4.021, pursuer_1/loss=4.159, pursuer_2/loss=4.406, pursuer_3/loss=4.675, pursuer_4/loss=4.772, pursuer_5/loss=4.551, pursuer_6/loss=4.816, pursuer_7/loss=4.911, rew=637.56]                           


Epoch #62: test_reward: 580.753062 ± 112.523538, best_reward: 661.870625 ± 126.197348 in #37


Epoch #63: 20001it [01:27, 229.01it/s, env_step=1260000, len=1552, n/ep=0, n/st=800, pursuer_0/loss=3.821, pursuer_1/loss=3.878, pursuer_2/loss=4.229, pursuer_3/loss=4.464, pursuer_4/loss=4.523, pursuer_5/loss=4.521, pursuer_6/loss=4.320, pursuer_7/loss=4.582, rew=663.23]                           


Epoch #63: test_reward: 599.735125 ± 113.227310, best_reward: 661.870625 ± 126.197348 in #37


Epoch #64: 20001it [01:28, 226.69it/s, env_step=1280000, len=1400, n/ep=1, n/st=800, pursuer_0/loss=3.729, pursuer_1/loss=4.025, pursuer_2/loss=4.362, pursuer_3/loss=4.557, pursuer_4/loss=4.458, pursuer_5/loss=4.516, pursuer_6/loss=4.456, pursuer_7/loss=4.684, rew=576.82]                           


Epoch #64: test_reward: 573.606187 ± 118.384300, best_reward: 661.870625 ± 126.197348 in #37


Epoch #65: 20001it [01:30, 219.88it/s, env_step=1300000, len=1552, n/ep=1, n/st=800, pursuer_0/loss=3.575, pursuer_1/loss=3.789, pursuer_2/loss=4.172, pursuer_3/loss=4.381, pursuer_4/loss=4.393, pursuer_5/loss=4.139, pursuer_6/loss=4.341, pursuer_7/loss=4.497, rew=717.81]                           


Steps Policy Saved  780
Epoch #65: test_reward: 560.533187 ± 123.055387, best_reward: 661.870625 ± 126.197348 in #37


Epoch #66: 20001it [01:32, 215.30it/s, env_step=1320000, len=1448, n/ep=1, n/st=800, pursuer_0/loss=3.415, pursuer_1/loss=3.642, pursuer_2/loss=4.204, pursuer_3/loss=4.370, pursuer_4/loss=4.175, pursuer_5/loss=4.140, pursuer_6/loss=4.231, pursuer_7/loss=4.419, rew=527.76]                           


Epoch #66: test_reward: 570.952562 ± 83.211292, best_reward: 661.870625 ± 126.197348 in #37


Epoch #67: 20001it [01:30, 221.70it/s, env_step=1340000, len=1216, n/ep=0, n/st=800, pursuer_0/loss=3.734, pursuer_1/loss=4.011, pursuer_2/loss=4.232, pursuer_3/loss=4.558, pursuer_4/loss=4.464, pursuer_5/loss=4.415, pursuer_6/loss=4.509, pursuer_7/loss=4.733, rew=699.04]                           


Epoch #67: test_reward: 575.119375 ± 94.563419, best_reward: 661.870625 ± 126.197348 in #37


Epoch #68: 20001it [01:31, 219.16it/s, env_step=1360000, len=888, n/ep=1, n/st=800, pursuer_0/loss=3.617, pursuer_1/loss=3.839, pursuer_2/loss=4.081, pursuer_3/loss=4.441, pursuer_4/loss=4.331, pursuer_5/loss=4.359, pursuer_6/loss=4.542, pursuer_7/loss=4.675, rew=546.39]                            


Steps Policy Saved  820
Epoch #68: test_reward: 561.176437 ± 101.083058, best_reward: 661.870625 ± 126.197348 in #37


Epoch #69: 20001it [01:32, 216.47it/s, env_step=1380000, len=1344, n/ep=0, n/st=800, pursuer_0/loss=3.696, pursuer_1/loss=4.001, pursuer_2/loss=4.332, pursuer_3/loss=4.593, pursuer_4/loss=4.338, pursuer_5/loss=4.481, pursuer_6/loss=4.513, pursuer_7/loss=4.608, rew=465.67]                           


Epoch #69: test_reward: 530.662937 ± 92.263640, best_reward: 661.870625 ± 126.197348 in #37


Epoch #70: 20001it [01:34, 212.43it/s, env_step=1400000, len=1376, n/ep=1, n/st=800, pursuer_0/loss=3.700, pursuer_1/loss=4.023, pursuer_2/loss=4.159, pursuer_3/loss=4.630, pursuer_4/loss=4.491, pursuer_5/loss=4.335, pursuer_6/loss=4.808, pursuer_7/loss=4.921, rew=538.00]                           


Epoch #70: test_reward: 602.290937 ± 99.692203, best_reward: 661.870625 ± 126.197348 in #37


Epoch #71: 20001it [01:30, 221.23it/s, env_step=1420000, len=1128, n/ep=2, n/st=800, pursuer_0/loss=3.721, pursuer_1/loss=4.033, pursuer_2/loss=4.291, pursuer_3/loss=4.392, pursuer_4/loss=4.357, pursuer_5/loss=4.427, pursuer_6/loss=4.566, pursuer_7/loss=4.785, rew=525.42]                           


Steps Policy Saved  860
Epoch #71: test_reward: 551.266562 ± 105.133211, best_reward: 661.870625 ± 126.197348 in #37


Epoch #72: 20001it [01:25, 234.41it/s, env_step=1440000, len=1220, n/ep=2, n/st=800, pursuer_0/loss=3.688, pursuer_1/loss=3.884, pursuer_2/loss=4.237, pursuer_3/loss=4.533, pursuer_4/loss=4.382, pursuer_5/loss=4.461, pursuer_6/loss=4.636, pursuer_7/loss=4.746, rew=581.94]                           


Epoch #72: test_reward: 570.458187 ± 88.654123, best_reward: 661.870625 ± 126.197348 in #37


Epoch #73: 20001it [01:32, 215.99it/s, env_step=1460000, len=1296, n/ep=1, n/st=800, pursuer_0/loss=3.673, pursuer_1/loss=4.067, pursuer_2/loss=4.312, pursuer_3/loss=4.574, pursuer_4/loss=4.535, pursuer_5/loss=4.514, pursuer_6/loss=4.782, pursuer_7/loss=4.835, rew=721.48]                           


Epoch #73: test_reward: 544.623562 ± 116.760380, best_reward: 661.870625 ± 126.197348 in #37


Epoch #74: 20001it [01:22, 241.56it/s, env_step=1480000, len=1400, n/ep=1, n/st=800, pursuer_0/loss=3.563, pursuer_1/loss=3.919, pursuer_2/loss=4.212, pursuer_3/loss=4.538, pursuer_4/loss=4.455, pursuer_5/loss=4.535, pursuer_6/loss=4.530, pursuer_7/loss=4.652, rew=475.90]                           


Epoch #74: test_reward: 564.313312 ± 85.600302, best_reward: 661.870625 ± 126.197348 in #37


Epoch #75: 20001it [01:22, 241.50it/s, env_step=1500000, len=1224, n/ep=1, n/st=800, pursuer_0/loss=3.716, pursuer_1/loss=4.172, pursuer_2/loss=4.487, pursuer_3/loss=4.728, pursuer_4/loss=4.539, pursuer_5/loss=4.640, pursuer_6/loss=4.873, pursuer_7/loss=4.877, rew=539.55]                           


Epoch #75: test_reward: 551.951437 ± 87.673068, best_reward: 661.870625 ± 126.197348 in #37


Epoch #76: 20001it [01:22, 242.15it/s, env_step=1520000, len=1712, n/ep=1, n/st=800, pursuer_0/loss=3.678, pursuer_1/loss=4.021, pursuer_2/loss=4.227, pursuer_3/loss=4.617, pursuer_4/loss=4.571, pursuer_5/loss=4.480, pursuer_6/loss=4.720, pursuer_7/loss=4.778, rew=462.48]                           


Epoch #76: test_reward: 592.935562 ± 113.464817, best_reward: 661.870625 ± 126.197348 in #37


Epoch #77: 20001it [01:23, 239.48it/s, env_step=1540000, len=1000, n/ep=0, n/st=800, pursuer_0/loss=3.659, pursuer_1/loss=4.006, pursuer_2/loss=4.344, pursuer_3/loss=4.556, pursuer_4/loss=4.551, pursuer_5/loss=4.646, pursuer_6/loss=4.872, pursuer_7/loss=4.836, rew=495.36]                           


Epoch #77: test_reward: 567.005250 ± 99.835279, best_reward: 661.870625 ± 126.197348 in #37


Epoch #78: 20001it [01:31, 218.25it/s, env_step=1560000, len=704, n/ep=1, n/st=800, pursuer_0/loss=3.802, pursuer_1/loss=4.061, pursuer_2/loss=4.397, pursuer_3/loss=4.466, pursuer_4/loss=4.528, pursuer_5/loss=4.551, pursuer_6/loss=4.878, pursuer_7/loss=4.835, rew=503.53]                            


Epoch #78: test_reward: 642.204562 ± 122.949595, best_reward: 661.870625 ± 126.197348 in #37


Epoch #79: 20001it [01:26, 232.18it/s, env_step=1580000, len=1096, n/ep=1, n/st=800, pursuer_0/loss=3.665, pursuer_1/loss=4.042, pursuer_2/loss=4.418, pursuer_3/loss=4.469, pursuer_4/loss=4.561, pursuer_5/loss=4.584, pursuer_6/loss=4.935, pursuer_7/loss=4.751, rew=726.35]                           


Epoch #79: test_reward: 545.869562 ± 122.099985, best_reward: 661.870625 ± 126.197348 in #37


Epoch #80: 20001it [01:26, 230.65it/s, env_step=1600000, len=1336, n/ep=1, n/st=800, pursuer_0/loss=3.729, pursuer_1/loss=4.119, pursuer_2/loss=4.298, pursuer_3/loss=4.485, pursuer_4/loss=4.697, pursuer_5/loss=4.601, pursuer_6/loss=4.896, pursuer_7/loss=4.693, rew=443.64]                           


Epoch #80: test_reward: 576.139375 ± 124.574912, best_reward: 661.870625 ± 126.197348 in #37


Epoch #81: 20001it [01:28, 227.18it/s, env_step=1620000, len=1296, n/ep=0, n/st=800, pursuer_0/loss=3.795, pursuer_1/loss=4.113, pursuer_2/loss=4.542, pursuer_3/loss=4.557, pursuer_4/loss=4.642, pursuer_5/loss=4.832, pursuer_6/loss=5.120, pursuer_7/loss=4.930, rew=508.27]                           


Epoch #81: test_reward: 589.560687 ± 119.010639, best_reward: 661.870625 ± 126.197348 in #37


Epoch #82: 20001it [01:25, 232.63it/s, env_step=1640000, len=1352, n/ep=1, n/st=800, pursuer_0/loss=3.938, pursuer_1/loss=3.986, pursuer_2/loss=4.355, pursuer_3/loss=4.435, pursuer_4/loss=4.670, pursuer_5/loss=4.785, pursuer_6/loss=5.105, pursuer_7/loss=4.686, rew=661.15]                           


Epoch #82: test_reward: 570.620250 ± 110.914398, best_reward: 661.870625 ± 126.197348 in #37


Epoch #83: 20001it [01:23, 240.36it/s, env_step=1660000, len=1240, n/ep=1, n/st=800, pursuer_0/loss=3.679, pursuer_1/loss=3.974, pursuer_2/loss=4.396, pursuer_3/loss=4.255, pursuer_4/loss=4.592, pursuer_5/loss=4.742, pursuer_6/loss=4.714, pursuer_7/loss=4.762, rew=593.14]                           


Epoch #83: test_reward: 530.595375 ± 106.759352, best_reward: 661.870625 ± 126.197348 in #37


Epoch #84: 20001it [01:31, 218.41it/s, env_step=1680000, len=1288, n/ep=0, n/st=800, pursuer_0/loss=3.772, pursuer_1/loss=3.833, pursuer_2/loss=4.314, pursuer_3/loss=4.347, pursuer_4/loss=4.638, pursuer_5/loss=4.480, pursuer_6/loss=4.833, pursuer_7/loss=4.558, rew=597.16]                           


Epoch #84: test_reward: 550.938062 ± 84.477699, best_reward: 661.870625 ± 126.197348 in #37


Epoch #85: 20001it [01:29, 222.47it/s, env_step=1700000, len=1372, n/ep=0, n/st=800, pursuer_0/loss=4.048, pursuer_1/loss=4.024, pursuer_2/loss=4.496, pursuer_3/loss=4.383, pursuer_4/loss=4.550, pursuer_5/loss=4.677, pursuer_6/loss=4.798, pursuer_7/loss=4.848, rew=496.48]                           


Epoch #85: test_reward: 599.836250 ± 137.620955, best_reward: 661.870625 ± 126.197348 in #37


Epoch #86: 20001it [01:26, 230.33it/s, env_step=1720000, len=1048, n/ep=0, n/st=800, pursuer_0/loss=3.891, pursuer_1/loss=3.823, pursuer_2/loss=4.277, pursuer_3/loss=4.155, pursuer_4/loss=4.267, pursuer_5/loss=4.375, pursuer_6/loss=4.719, pursuer_7/loss=4.475, rew=481.47]                           


Epoch #86: test_reward: 566.358375 ± 120.598933, best_reward: 661.870625 ± 126.197348 in #37


Epoch #87: 20001it [01:28, 226.15it/s, env_step=1740000, len=1568, n/ep=0, n/st=800, pursuer_0/loss=3.863, pursuer_1/loss=3.803, pursuer_2/loss=4.192, pursuer_3/loss=4.294, pursuer_4/loss=4.331, pursuer_5/loss=4.572, pursuer_6/loss=4.473, pursuer_7/loss=4.624, rew=699.48]                           


Epoch #87: test_reward: 545.112812 ± 105.768229, best_reward: 661.870625 ± 126.197348 in #37


Epoch #88: 20001it [01:27, 227.86it/s, env_step=1760000, len=1408, n/ep=0, n/st=800, pursuer_0/loss=3.825, pursuer_1/loss=3.778, pursuer_2/loss=4.145, pursuer_3/loss=4.132, pursuer_4/loss=4.420, pursuer_5/loss=4.551, pursuer_6/loss=4.446, pursuer_7/loss=4.624, rew=605.91]                           


Epoch #88: test_reward: 586.024062 ± 118.535093, best_reward: 661.870625 ± 126.197348 in #37


Epoch #89: 20001it [01:29, 224.43it/s, env_step=1780000, len=848, n/ep=0, n/st=800, pursuer_0/loss=3.887, pursuer_1/loss=3.924, pursuer_2/loss=4.249, pursuer_3/loss=4.216, pursuer_4/loss=4.313, pursuer_5/loss=4.459, pursuer_6/loss=4.526, pursuer_7/loss=4.390, rew=455.50]                            


Epoch #89: test_reward: 545.303375 ± 105.045553, best_reward: 661.870625 ± 126.197348 in #37


Epoch #90: 20001it [01:27, 229.15it/s, env_step=1800000, len=1344, n/ep=0, n/st=800, pursuer_0/loss=4.001, pursuer_1/loss=3.965, pursuer_2/loss=4.305, pursuer_3/loss=4.344, pursuer_4/loss=4.531, pursuer_5/loss=4.646, pursuer_6/loss=4.600, pursuer_7/loss=4.464, rew=446.55]                           


Epoch #90: test_reward: 589.025375 ± 105.059734, best_reward: 661.870625 ± 126.197348 in #37


Epoch #91: 20001it [01:23, 240.76it/s, env_step=1820000, len=1168, n/ep=0, n/st=800, pursuer_0/loss=4.057, pursuer_1/loss=4.032, pursuer_2/loss=4.315, pursuer_3/loss=4.474, pursuer_4/loss=4.370, pursuer_5/loss=4.602, pursuer_6/loss=4.598, pursuer_7/loss=4.879, rew=529.58]                           


Epoch #91: test_reward: 542.654312 ± 89.299135, best_reward: 661.870625 ± 126.197348 in #37


Epoch #92: 20001it [01:29, 223.36it/s, env_step=1840000, len=1464, n/ep=0, n/st=800, pursuer_0/loss=3.814, pursuer_1/loss=3.886, pursuer_2/loss=4.159, pursuer_3/loss=4.413, pursuer_4/loss=4.395, pursuer_5/loss=4.445, pursuer_6/loss=4.436, pursuer_7/loss=4.507, rew=500.29]                           


Epoch #92: test_reward: 523.604437 ± 101.120851, best_reward: 661.870625 ± 126.197348 in #37


Epoch #93: 20001it [01:31, 219.50it/s, env_step=1860000, len=952, n/ep=0, n/st=800, pursuer_0/loss=3.759, pursuer_1/loss=3.658, pursuer_2/loss=4.114, pursuer_3/loss=4.142, pursuer_4/loss=4.086, pursuer_5/loss=4.394, pursuer_6/loss=4.295, pursuer_7/loss=4.506, rew=415.25]                            


Epoch #93: test_reward: 547.364375 ± 108.199753, best_reward: 661.870625 ± 126.197348 in #37


Epoch #94: 20001it [01:30, 221.72it/s, env_step=1880000, len=1232, n/ep=0, n/st=800, pursuer_0/loss=4.066, pursuer_1/loss=3.930, pursuer_2/loss=4.395, pursuer_3/loss=4.478, pursuer_4/loss=4.450, pursuer_5/loss=4.395, pursuer_6/loss=4.336, pursuer_7/loss=4.660, rew=378.39]                           


Epoch #94: test_reward: 605.323500 ± 123.690734, best_reward: 661.870625 ± 126.197348 in #37


Epoch #95: 20001it [01:31, 218.42it/s, env_step=1900000, len=1472, n/ep=0, n/st=800, pursuer_0/loss=3.920, pursuer_1/loss=3.795, pursuer_2/loss=4.179, pursuer_3/loss=4.300, pursuer_4/loss=4.207, pursuer_5/loss=4.382, pursuer_6/loss=4.283, pursuer_7/loss=4.699, rew=475.62]                           


Steps Policy Saved  1180
Epoch #95: test_reward: 563.635375 ± 114.761533, best_reward: 661.870625 ± 126.197348 in #37


Epoch #96: 20001it [01:24, 237.41it/s, env_step=1920000, len=936, n/ep=0, n/st=800, pursuer_0/loss=3.775, pursuer_1/loss=3.796, pursuer_2/loss=4.150, pursuer_3/loss=4.167, pursuer_4/loss=4.318, pursuer_5/loss=4.350, pursuer_6/loss=4.215, pursuer_7/loss=4.473, rew=586.12]                            


Epoch #96: test_reward: 573.391875 ± 97.340777, best_reward: 661.870625 ± 126.197348 in #37


Epoch #97: 20001it [01:22, 242.91it/s, env_step=1940000, len=960, n/ep=1, n/st=800, pursuer_0/loss=3.661, pursuer_1/loss=3.642, pursuer_2/loss=4.025, pursuer_3/loss=4.142, pursuer_4/loss=4.112, pursuer_5/loss=4.054, pursuer_6/loss=4.180, pursuer_7/loss=4.375, rew=463.74]                            


Epoch #97: test_reward: 591.092250 ± 118.841280, best_reward: 661.870625 ± 126.197348 in #37


Epoch #98: 20001it [01:34, 212.63it/s, env_step=1960000, len=1364, n/ep=2, n/st=800, pursuer_0/loss=4.076, pursuer_1/loss=3.940, pursuer_2/loss=4.311, pursuer_3/loss=4.351, pursuer_4/loss=4.227, pursuer_5/loss=4.462, pursuer_6/loss=4.542, pursuer_7/loss=4.716, rew=644.39]                           


Epoch #98: test_reward: 609.833875 ± 107.692318, best_reward: 661.870625 ± 126.197348 in #37


Epoch #99: 20001it [01:33, 215.04it/s, env_step=1980000, len=1068, n/ep=2, n/st=800, pursuer_0/loss=3.904, pursuer_1/loss=3.913, pursuer_2/loss=4.375, pursuer_3/loss=4.376, pursuer_4/loss=4.261, pursuer_5/loss=4.351, pursuer_6/loss=4.456, pursuer_7/loss=4.640, rew=461.26]                           


Epoch #99: test_reward: 574.525000 ± 123.936270, best_reward: 661.870625 ± 126.197348 in #37


Epoch #100: 20001it [01:29, 222.61it/s, env_step=2000000, len=984, n/ep=2, n/st=800, pursuer_0/loss=3.840, pursuer_1/loss=3.849, pursuer_2/loss=4.024, pursuer_3/loss=4.139, pursuer_4/loss=4.162, pursuer_5/loss=4.338, pursuer_6/loss=4.369, pursuer_7/loss=4.448, rew=559.26]                            


Epoch #100: test_reward: 526.512812 ± 108.492880, best_reward: 661.870625 ± 126.197348 in #37


Epoch #101: 20001it [01:29, 223.90it/s, env_step=2020000, len=1104, n/ep=0, n/st=800, pursuer_0/loss=3.781, pursuer_1/loss=3.948, pursuer_2/loss=4.247, pursuer_3/loss=4.323, pursuer_4/loss=4.319, pursuer_5/loss=4.296, pursuer_6/loss=4.423, pursuer_7/loss=4.683, rew=429.75]                           


Epoch #101: test_reward: 585.818437 ± 110.782112, best_reward: 661.870625 ± 126.197348 in #37


Epoch #102: 20001it [01:28, 226.11it/s, env_step=2040000, len=1264, n/ep=0, n/st=800, pursuer_0/loss=4.024, pursuer_1/loss=3.914, pursuer_2/loss=4.262, pursuer_3/loss=4.347, pursuer_4/loss=4.454, pursuer_5/loss=4.390, pursuer_6/loss=4.409, pursuer_7/loss=4.579, rew=464.41]                           


Epoch #102: test_reward: 542.573875 ± 88.650517, best_reward: 661.870625 ± 126.197348 in #37


Epoch #103: 20001it [01:24, 237.26it/s, env_step=2060000, len=1032, n/ep=0, n/st=800, pursuer_0/loss=3.913, pursuer_1/loss=3.891, pursuer_2/loss=4.299, pursuer_3/loss=4.431, pursuer_4/loss=4.328, pursuer_5/loss=4.560, pursuer_6/loss=4.544, pursuer_7/loss=4.929, rew=601.24]                           


Epoch #103: test_reward: 583.609812 ± 125.703081, best_reward: 661.870625 ± 126.197348 in #37


Epoch #104: 20001it [01:25, 233.89it/s, env_step=2080000, len=1368, n/ep=1, n/st=800, pursuer_0/loss=3.947, pursuer_1/loss=3.863, pursuer_2/loss=4.242, pursuer_3/loss=4.404, pursuer_4/loss=4.273, pursuer_5/loss=4.360, pursuer_6/loss=4.466, pursuer_7/loss=4.704, rew=365.35]                           


Epoch #104: test_reward: 524.062000 ± 75.429352, best_reward: 661.870625 ± 126.197348 in #37


Epoch #105: 20001it [01:31, 219.78it/s, env_step=2100000, len=1512, n/ep=0, n/st=800, pursuer_0/loss=4.010, pursuer_1/loss=3.925, pursuer_2/loss=4.256, pursuer_3/loss=4.463, pursuer_4/loss=4.339, pursuer_5/loss=4.529, pursuer_6/loss=4.476, pursuer_7/loss=4.616, rew=620.19]                           


Epoch #105: test_reward: 554.957625 ± 112.296943, best_reward: 661.870625 ± 126.197348 in #37


Epoch #106: 20001it [01:29, 222.71it/s, env_step=2120000, len=1248, n/ep=0, n/st=800, pursuer_0/loss=3.873, pursuer_1/loss=3.944, pursuer_2/loss=4.262, pursuer_3/loss=4.410, pursuer_4/loss=4.333, pursuer_5/loss=4.403, pursuer_6/loss=4.585, pursuer_7/loss=4.786, rew=525.92]                           


Epoch #106: test_reward: 557.219437 ± 78.608520, best_reward: 661.870625 ± 126.197348 in #37


Epoch #107: 20001it [01:22, 242.45it/s, env_step=2140000, len=1248, n/ep=1, n/st=800, pursuer_0/loss=3.953, pursuer_1/loss=4.054, pursuer_2/loss=4.310, pursuer_3/loss=4.346, pursuer_4/loss=4.236, pursuer_5/loss=4.470, pursuer_6/loss=4.511, pursuer_7/loss=4.671, rew=567.60]                           


Epoch #107: test_reward: 573.161000 ± 94.372172, best_reward: 661.870625 ± 126.197348 in #37


Epoch #108: 20001it [01:20, 249.43it/s, env_step=2160000, len=1168, n/ep=0, n/st=800, pursuer_0/loss=3.907, pursuer_1/loss=3.911, pursuer_2/loss=4.299, pursuer_3/loss=4.248, pursuer_4/loss=4.372, pursuer_5/loss=4.442, pursuer_6/loss=4.650, pursuer_7/loss=4.664, rew=459.73]                           


Epoch #108: test_reward: 553.117937 ± 118.633477, best_reward: 661.870625 ± 126.197348 in #37


Epoch #109: 20001it [01:19, 250.50it/s, env_step=2180000, len=1220, n/ep=0, n/st=800, pursuer_0/loss=3.848, pursuer_1/loss=3.967, pursuer_2/loss=4.337, pursuer_3/loss=4.334, pursuer_4/loss=4.307, pursuer_5/loss=4.393, pursuer_6/loss=4.736, pursuer_7/loss=4.578, rew=479.29]                           


Epoch #109: test_reward: 573.641875 ± 98.581677, best_reward: 661.870625 ± 126.197348 in #37


Epoch #110: 20001it [01:19, 250.27it/s, env_step=2200000, len=1240, n/ep=0, n/st=800, pursuer_0/loss=3.880, pursuer_1/loss=3.857, pursuer_2/loss=4.305, pursuer_3/loss=4.143, pursuer_4/loss=4.249, pursuer_5/loss=4.514, pursuer_6/loss=4.590, pursuer_7/loss=4.682, rew=487.18]                           


Epoch #110: test_reward: 565.446937 ± 103.574315, best_reward: 661.870625 ± 126.197348 in #37


Epoch #111: 20001it [01:19, 250.45it/s, env_step=2220000, len=1128, n/ep=1, n/st=800, pursuer_0/loss=3.988, pursuer_1/loss=3.988, pursuer_2/loss=4.395, pursuer_3/loss=4.588, pursuer_4/loss=4.415, pursuer_5/loss=4.358, pursuer_6/loss=4.652, pursuer_7/loss=4.827, rew=463.50]                           


Epoch #111: test_reward: 637.384062 ± 71.578694, best_reward: 661.870625 ± 126.197348 in #37


Epoch #112: 20001it [01:24, 236.28it/s, env_step=2240000, len=981, n/ep=3, n/st=800, pursuer_0/loss=3.916, pursuer_1/loss=3.982, pursuer_2/loss=4.415, pursuer_3/loss=4.349, pursuer_4/loss=4.493, pursuer_5/loss=4.478, pursuer_6/loss=4.651, pursuer_7/loss=4.902, rew=519.34]                            


Epoch #112: test_reward: 551.973687 ± 83.326774, best_reward: 661.870625 ± 126.197348 in #37


Epoch #113: 20001it [01:23, 240.60it/s, env_step=2260000, len=1237, n/ep=0, n/st=800, pursuer_0/loss=3.853, pursuer_1/loss=3.960, pursuer_2/loss=4.401, pursuer_3/loss=4.468, pursuer_4/loss=4.352, pursuer_5/loss=4.391, pursuer_6/loss=4.578, pursuer_7/loss=4.654, rew=627.30]                           


Steps Policy Saved  1420
Epoch #113: test_reward: 550.913125 ± 68.680395, best_reward: 661.870625 ± 126.197348 in #37


Epoch #114: 20001it [01:24, 235.44it/s, env_step=2280000, len=1224, n/ep=1, n/st=800, pursuer_0/loss=3.755, pursuer_1/loss=3.808, pursuer_2/loss=4.342, pursuer_3/loss=4.194, pursuer_4/loss=4.419, pursuer_5/loss=4.379, pursuer_6/loss=4.401, pursuer_7/loss=4.472, rew=593.86]                           


Epoch #114: test_reward: 544.605125 ± 98.015265, best_reward: 661.870625 ± 126.197348 in #37


Epoch #115: 20001it [01:31, 217.76it/s, env_step=2300000, len=1608, n/ep=1, n/st=800, pursuer_0/loss=3.698, pursuer_1/loss=3.862, pursuer_2/loss=4.369, pursuer_3/loss=4.265, pursuer_4/loss=4.439, pursuer_5/loss=4.298, pursuer_6/loss=4.488, pursuer_7/loss=4.594, rew=591.82]                           


Epoch #115: test_reward: 560.468625 ± 99.959255, best_reward: 661.870625 ± 126.197348 in #37


Epoch #116: 20001it [01:27, 228.41it/s, env_step=2320000, len=1160, n/ep=0, n/st=800, pursuer_0/loss=3.737, pursuer_1/loss=3.639, pursuer_2/loss=4.248, pursuer_3/loss=4.330, pursuer_4/loss=4.378, pursuer_5/loss=4.453, pursuer_6/loss=4.499, pursuer_7/loss=4.626, rew=754.75]                           


Epoch #116: test_reward: 556.290750 ± 105.468179, best_reward: 661.870625 ± 126.197348 in #37


Epoch #117: 20001it [01:23, 239.95it/s, env_step=2340000, len=1424, n/ep=0, n/st=800, pursuer_0/loss=3.657, pursuer_1/loss=3.618, pursuer_2/loss=4.210, pursuer_3/loss=4.254, pursuer_4/loss=4.161, pursuer_5/loss=4.155, pursuer_6/loss=4.406, pursuer_7/loss=4.335, rew=557.91]                           


Epoch #117: test_reward: 542.571937 ± 76.828778, best_reward: 661.870625 ± 126.197348 in #37


Epoch #118: 20001it [01:24, 237.87it/s, env_step=2360000, len=1496, n/ep=0, n/st=800, pursuer_0/loss=3.761, pursuer_1/loss=3.773, pursuer_2/loss=4.358, pursuer_3/loss=4.248, pursuer_4/loss=4.244, pursuer_5/loss=4.356, pursuer_6/loss=4.590, pursuer_7/loss=4.406, rew=647.22]                           


Epoch #118: test_reward: 589.090625 ± 91.512218, best_reward: 661.870625 ± 126.197348 in #37


Epoch #119: 20001it [01:31, 218.43it/s, env_step=2380000, len=1288, n/ep=0, n/st=800, pursuer_0/loss=3.741, pursuer_1/loss=3.864, pursuer_2/loss=4.260, pursuer_3/loss=4.086, pursuer_4/loss=4.326, pursuer_5/loss=4.546, pursuer_6/loss=4.587, pursuer_7/loss=4.583, rew=560.06]                           


Epoch #119: test_reward: 560.043187 ± 116.490125, best_reward: 661.870625 ± 126.197348 in #37


Epoch #120: 20001it [01:31, 218.85it/s, env_step=2400000, len=1160, n/ep=2, n/st=800, pursuer_0/loss=3.789, pursuer_1/loss=3.951, pursuer_2/loss=4.416, pursuer_3/loss=4.312, pursuer_4/loss=4.508, pursuer_5/loss=4.534, pursuer_6/loss=4.618, pursuer_7/loss=4.537, rew=485.29]                           


Epoch #120: test_reward: 543.510187 ± 100.295749, best_reward: 661.870625 ± 126.197348 in #37


Epoch #121: 20001it [01:24, 235.93it/s, env_step=2420000, len=1144, n/ep=0, n/st=800, pursuer_0/loss=3.726, pursuer_1/loss=3.878, pursuer_2/loss=4.319, pursuer_3/loss=4.205, pursuer_4/loss=4.409, pursuer_5/loss=4.164, pursuer_6/loss=4.535, pursuer_7/loss=4.565, rew=403.62]                           


Steps Policy Saved  1520
Epoch #121: test_reward: 576.893875 ± 102.444719, best_reward: 661.870625 ± 126.197348 in #37


Epoch #122: 20001it [01:25, 232.73it/s, env_step=2440000, len=1008, n/ep=1, n/st=800, pursuer_0/loss=3.720, pursuer_1/loss=3.886, pursuer_2/loss=4.247, pursuer_3/loss=4.266, pursuer_4/loss=4.353, pursuer_5/loss=4.310, pursuer_6/loss=4.529, pursuer_7/loss=4.523, rew=700.02]                           


Epoch #122: test_reward: 605.208625 ± 108.614347, best_reward: 661.870625 ± 126.197348 in #37


Epoch #123: 20001it [01:28, 227.15it/s, env_step=2460000, len=1224, n/ep=1, n/st=800, pursuer_0/loss=3.883, pursuer_1/loss=3.942, pursuer_2/loss=4.414, pursuer_3/loss=4.490, pursuer_4/loss=4.495, pursuer_5/loss=4.343, pursuer_6/loss=4.475, pursuer_7/loss=4.637, rew=643.65]                           


Epoch #123: test_reward: 549.150750 ± 103.354301, best_reward: 661.870625 ± 126.197348 in #37


Epoch #124: 20001it [01:29, 224.52it/s, env_step=2480000, len=964, n/ep=0, n/st=800, pursuer_0/loss=3.758, pursuer_1/loss=3.735, pursuer_2/loss=4.218, pursuer_3/loss=4.265, pursuer_4/loss=4.357, pursuer_5/loss=4.318, pursuer_6/loss=4.448, pursuer_7/loss=4.535, rew=515.98]                            


Epoch #124: test_reward: 595.387000 ± 90.781994, best_reward: 661.870625 ± 126.197348 in #37


Epoch #125: 20001it [01:30, 220.28it/s, env_step=2500000, len=1408, n/ep=2, n/st=800, pursuer_0/loss=3.827, pursuer_1/loss=4.053, pursuer_2/loss=4.327, pursuer_3/loss=4.256, pursuer_4/loss=4.283, pursuer_5/loss=4.339, pursuer_6/loss=4.511, pursuer_7/loss=4.472, rew=498.80]                           


Epoch #125: test_reward: 545.314812 ± 89.125319, best_reward: 661.870625 ± 126.197348 in #37


Epoch #126: 20001it [01:31, 217.52it/s, env_step=2520000, len=1200, n/ep=0, n/st=800, pursuer_0/loss=3.881, pursuer_1/loss=3.933, pursuer_2/loss=4.408, pursuer_3/loss=4.231, pursuer_4/loss=4.388, pursuer_5/loss=4.410, pursuer_6/loss=4.604, pursuer_7/loss=4.651, rew=597.83]                           


Epoch #126: test_reward: 570.526500 ± 91.872442, best_reward: 661.870625 ± 126.197348 in #37


Epoch #127: 20001it [01:28, 225.28it/s, env_step=2540000, len=840, n/ep=0, n/st=800, pursuer_0/loss=3.796, pursuer_1/loss=3.887, pursuer_2/loss=4.323, pursuer_3/loss=4.291, pursuer_4/loss=4.425, pursuer_5/loss=4.377, pursuer_6/loss=4.553, pursuer_7/loss=4.474, rew=436.33]                            


Steps Policy Saved  1600
Epoch #127: test_reward: 613.952250 ± 106.560210, best_reward: 661.870625 ± 126.197348 in #37


Epoch #128: 20001it [01:25, 234.34it/s, env_step=2560000, len=1336, n/ep=1, n/st=800, pursuer_0/loss=3.732, pursuer_1/loss=3.851, pursuer_2/loss=4.436, pursuer_3/loss=4.227, pursuer_4/loss=4.384, pursuer_5/loss=4.426, pursuer_6/loss=4.415, pursuer_7/loss=4.502, rew=540.35]                           


Epoch #128: test_reward: 546.774750 ± 96.885798, best_reward: 661.870625 ± 126.197348 in #37


Epoch #129: 20001it [01:33, 213.13it/s, env_step=2580000, len=1200, n/ep=0, n/st=800, pursuer_0/loss=3.909, pursuer_1/loss=3.926, pursuer_2/loss=4.527, pursuer_3/loss=4.272, pursuer_4/loss=4.397, pursuer_5/loss=4.610, pursuer_6/loss=4.526, pursuer_7/loss=4.698, rew=450.32]                           


Epoch #129: test_reward: 578.460750 ± 89.649084, best_reward: 661.870625 ± 126.197348 in #37


Epoch #130: 20001it [01:28, 226.44it/s, env_step=2600000, len=1368, n/ep=0, n/st=800, pursuer_0/loss=3.996, pursuer_1/loss=3.992, pursuer_2/loss=4.469, pursuer_3/loss=4.440, pursuer_4/loss=4.552, pursuer_5/loss=4.594, pursuer_6/loss=4.667, pursuer_7/loss=4.937, rew=599.87]                           


Steps Policy Saved  1640
Epoch #130: test_reward: 552.566687 ± 89.645212, best_reward: 661.870625 ± 126.197348 in #37


Epoch #131: 20001it [01:28, 226.06it/s, env_step=2620000, len=1552, n/ep=1, n/st=800, pursuer_0/loss=4.135, pursuer_1/loss=4.072, pursuer_2/loss=4.633, pursuer_3/loss=4.265, pursuer_4/loss=4.624, pursuer_5/loss=4.632, pursuer_6/loss=4.712, pursuer_7/loss=4.733, rew=536.12]                           


Epoch #131: test_reward: 584.013937 ± 108.834457, best_reward: 661.870625 ± 126.197348 in #37


Epoch #132: 20001it [01:31, 218.10it/s, env_step=2640000, len=1592, n/ep=0, n/st=800, pursuer_0/loss=4.146, pursuer_1/loss=4.127, pursuer_2/loss=4.521, pursuer_3/loss=4.442, pursuer_4/loss=4.769, pursuer_5/loss=4.596, pursuer_6/loss=4.671, pursuer_7/loss=4.953, rew=512.12]                           


Epoch #132: test_reward: 535.965687 ± 74.800220, best_reward: 661.870625 ± 126.197348 in #37


Epoch #133: 20001it [01:25, 233.95it/s, env_step=2660000, len=1008, n/ep=1, n/st=800, pursuer_0/loss=4.190, pursuer_1/loss=4.021, pursuer_2/loss=4.377, pursuer_3/loss=4.339, pursuer_4/loss=4.531, pursuer_5/loss=4.468, pursuer_6/loss=4.651, pursuer_7/loss=4.697, rew=705.14]                           


Epoch #133: test_reward: 577.731000 ± 117.461827, best_reward: 661.870625 ± 126.197348 in #37


Epoch #134: 20001it [01:26, 230.26it/s, env_step=2680000, len=1124, n/ep=2, n/st=800, pursuer_0/loss=3.951, pursuer_1/loss=3.856, pursuer_2/loss=4.400, pursuer_3/loss=4.378, pursuer_4/loss=4.411, pursuer_5/loss=4.475, pursuer_6/loss=4.704, pursuer_7/loss=4.579, rew=553.42]                           


Epoch #134: test_reward: 538.338062 ± 102.777044, best_reward: 661.870625 ± 126.197348 in #37


Epoch #135: 20001it [01:21, 246.46it/s, env_step=2700000, len=1264, n/ep=1, n/st=800, pursuer_0/loss=4.319, pursuer_1/loss=4.258, pursuer_2/loss=4.492, pursuer_3/loss=4.502, pursuer_4/loss=4.564, pursuer_5/loss=4.746, pursuer_6/loss=4.905, pursuer_7/loss=5.004, rew=600.00]                           


Epoch #135: test_reward: 549.530125 ± 113.836488, best_reward: 661.870625 ± 126.197348 in #37


Epoch #136: 20001it [01:28, 225.75it/s, env_step=2720000, len=1144, n/ep=1, n/st=800, pursuer_0/loss=3.975, pursuer_1/loss=3.873, pursuer_2/loss=4.428, pursuer_3/loss=4.493, pursuer_4/loss=4.379, pursuer_5/loss=4.582, pursuer_6/loss=4.656, pursuer_7/loss=4.742, rew=681.13]                           


Epoch #136: test_reward: 553.344500 ± 96.529420, best_reward: 661.870625 ± 126.197348 in #37


Epoch #137: 20001it [01:28, 225.43it/s, env_step=2740000, len=1396, n/ep=2, n/st=800, pursuer_0/loss=3.933, pursuer_1/loss=3.927, pursuer_2/loss=4.281, pursuer_3/loss=4.448, pursuer_4/loss=4.354, pursuer_5/loss=4.483, pursuer_6/loss=4.622, pursuer_7/loss=4.657, rew=561.94]                           


Epoch #137: test_reward: 606.909500 ± 89.422273, best_reward: 661.870625 ± 126.197348 in #37


Epoch #138: 20001it [01:30, 219.86it/s, env_step=2760000, len=1272, n/ep=1, n/st=800, pursuer_0/loss=4.091, pursuer_1/loss=3.944, pursuer_2/loss=4.413, pursuer_3/loss=4.321, pursuer_4/loss=4.486, pursuer_5/loss=4.647, pursuer_6/loss=4.688, pursuer_7/loss=4.754, rew=474.92]                           


Epoch #138: test_reward: 587.040437 ± 111.506533, best_reward: 661.870625 ± 126.197348 in #37


Epoch #139: 20001it [01:21, 245.96it/s, env_step=2780000, len=1312, n/ep=1, n/st=800, pursuer_0/loss=3.981, pursuer_1/loss=3.949, pursuer_2/loss=4.238, pursuer_3/loss=4.428, pursuer_4/loss=4.410, pursuer_5/loss=4.579, pursuer_6/loss=4.668, pursuer_7/loss=4.674, rew=714.17]                           


Epoch #139: test_reward: 564.083062 ± 111.182491, best_reward: 661.870625 ± 126.197348 in #37


Epoch #140: 20001it [01:22, 241.05it/s, env_step=2800000, len=1448, n/ep=0, n/st=800, pursuer_0/loss=4.104, pursuer_1/loss=4.021, pursuer_2/loss=4.416, pursuer_3/loss=4.408, pursuer_4/loss=4.581, pursuer_5/loss=4.586, pursuer_6/loss=4.763, pursuer_7/loss=4.700, rew=471.60]                           


Epoch #140: test_reward: 573.588937 ± 112.777460, best_reward: 661.870625 ± 126.197348 in #37


Epoch #141: 20001it [01:29, 223.92it/s, env_step=2820000, len=1144, n/ep=1, n/st=800, pursuer_0/loss=4.140, pursuer_1/loss=4.029, pursuer_2/loss=4.313, pursuer_3/loss=4.450, pursuer_4/loss=4.464, pursuer_5/loss=4.588, pursuer_6/loss=4.627, pursuer_7/loss=4.736, rew=672.21]                           


Epoch #141: test_reward: 592.792812 ± 98.230097, best_reward: 661.870625 ± 126.197348 in #37


Epoch #142: 20001it [01:20, 247.98it/s, env_step=2840000, len=1176, n/ep=2, n/st=800, pursuer_0/loss=4.057, pursuer_1/loss=4.076, pursuer_2/loss=4.589, pursuer_3/loss=4.506, pursuer_4/loss=4.422, pursuer_5/loss=4.721, pursuer_6/loss=4.872, pursuer_7/loss=4.919, rew=563.07]                           


Epoch #142: test_reward: 606.535062 ± 109.400443, best_reward: 661.870625 ± 126.197348 in #37


Epoch #143: 20001it [01:28, 227.02it/s, env_step=2860000, len=1056, n/ep=0, n/st=800, pursuer_0/loss=4.173, pursuer_1/loss=4.257, pursuer_2/loss=4.314, pursuer_3/loss=4.494, pursuer_4/loss=4.541, pursuer_5/loss=4.699, pursuer_6/loss=4.733, pursuer_7/loss=4.988, rew=505.19]                           


Epoch #143: test_reward: 560.165875 ± 102.106689, best_reward: 661.870625 ± 126.197348 in #37


Epoch #144: 20001it [01:27, 228.64it/s, env_step=2880000, len=1176, n/ep=1, n/st=800, pursuer_0/loss=4.141, pursuer_1/loss=4.071, pursuer_2/loss=4.401, pursuer_3/loss=4.411, pursuer_4/loss=4.414, pursuer_5/loss=4.572, pursuer_6/loss=4.777, pursuer_7/loss=4.889, rew=538.57]                           


Steps Policy Saved  1830
Epoch #144: test_reward: 546.708500 ± 84.968389, best_reward: 661.870625 ± 126.197348 in #37


Epoch #145: 20001it [01:25, 233.19it/s, env_step=2900000, len=968, n/ep=0, n/st=800, pursuer_0/loss=4.214, pursuer_1/loss=4.315, pursuer_2/loss=4.700, pursuer_3/loss=4.545, pursuer_4/loss=4.669, pursuer_5/loss=4.872, pursuer_6/loss=4.911, pursuer_7/loss=5.097, rew=538.82]                            


Epoch #145: test_reward: 541.017250 ± 101.929610, best_reward: 661.870625 ± 126.197348 in #37


Epoch #146: 20001it [01:19, 250.61it/s, env_step=2920000, len=1324, n/ep=0, n/st=800, pursuer_0/loss=4.001, pursuer_1/loss=4.065, pursuer_2/loss=4.590, pursuer_3/loss=4.393, pursuer_4/loss=4.401, pursuer_5/loss=4.574, pursuer_6/loss=4.776, pursuer_7/loss=4.976, rew=527.21]                           


Epoch #146: test_reward: 567.261812 ± 72.720496, best_reward: 661.870625 ± 126.197348 in #37


Epoch #147: 20001it [01:21, 244.23it/s, env_step=2940000, len=1396, n/ep=0, n/st=800, pursuer_0/loss=4.154, pursuer_1/loss=4.348, pursuer_2/loss=4.816, pursuer_3/loss=4.678, pursuer_4/loss=4.677, pursuer_5/loss=4.799, pursuer_6/loss=5.024, pursuer_7/loss=5.147, rew=706.00]                           


Epoch #147: test_reward: 582.181250 ± 124.603697, best_reward: 661.870625 ± 126.197348 in #37


Epoch #148: 20001it [01:28, 226.17it/s, env_step=2960000, len=1448, n/ep=1, n/st=800, pursuer_0/loss=4.116, pursuer_1/loss=4.285, pursuer_2/loss=4.692, pursuer_3/loss=4.634, pursuer_4/loss=4.426, pursuer_5/loss=4.731, pursuer_6/loss=4.911, pursuer_7/loss=5.028, rew=616.64]                           


Epoch #148: test_reward: 563.834750 ± 116.902623, best_reward: 661.870625 ± 126.197348 in #37


Epoch #149: 20001it [01:21, 246.69it/s, env_step=2980000, len=1112, n/ep=1, n/st=800, pursuer_0/loss=4.219, pursuer_1/loss=4.322, pursuer_2/loss=4.829, pursuer_3/loss=4.696, pursuer_4/loss=4.557, pursuer_5/loss=4.840, pursuer_6/loss=4.984, pursuer_7/loss=5.192, rew=454.01]                           


Epoch #149: test_reward: 537.578812 ± 77.727782, best_reward: 661.870625 ± 126.197348 in #37


Epoch #150: 20001it [01:21, 244.09it/s, env_step=3000000, len=1224, n/ep=1, n/st=800, pursuer_0/loss=4.353, pursuer_1/loss=4.438, pursuer_2/loss=4.676, pursuer_3/loss=4.756, pursuer_4/loss=4.793, pursuer_5/loss=5.049, pursuer_6/loss=5.198, pursuer_7/loss=5.254, rew=649.41]                           


Epoch #150: test_reward: 566.545312 ± 93.019741, best_reward: 661.870625 ± 126.197348 in #37


Epoch #151: 20001it [01:22, 242.82it/s, env_step=3020000, len=952, n/ep=1, n/st=800, pursuer_0/loss=4.112, pursuer_1/loss=4.227, pursuer_2/loss=4.584, pursuer_3/loss=4.487, pursuer_4/loss=4.579, pursuer_5/loss=4.780, pursuer_6/loss=5.027, pursuer_7/loss=5.161, rew=570.53]                            


Epoch #151: test_reward: 577.333937 ± 125.276472, best_reward: 661.870625 ± 126.197348 in #37


Epoch #152: 20001it [01:20, 247.09it/s, env_step=3040000, len=888, n/ep=0, n/st=800, pursuer_0/loss=4.154, pursuer_1/loss=4.323, pursuer_2/loss=4.608, pursuer_3/loss=4.580, pursuer_4/loss=4.696, pursuer_5/loss=4.815, pursuer_6/loss=4.917, pursuer_7/loss=4.913, rew=450.92]                            


Epoch #152: test_reward: 534.875063 ± 119.949788, best_reward: 661.870625 ± 126.197348 in #37


Epoch #153: 20001it [01:21, 246.63it/s, env_step=3060000, len=980, n/ep=0, n/st=800, pursuer_0/loss=4.156, pursuer_1/loss=4.336, pursuer_2/loss=4.737, pursuer_3/loss=4.677, pursuer_4/loss=4.805, pursuer_5/loss=4.792, pursuer_6/loss=4.994, pursuer_7/loss=5.101, rew=494.33]                            


Epoch #153: test_reward: 532.059937 ± 88.087441, best_reward: 661.870625 ± 126.197348 in #37


Epoch #154: 20001it [01:29, 223.50it/s, env_step=3080000, len=1232, n/ep=1, n/st=800, pursuer_0/loss=3.996, pursuer_1/loss=4.208, pursuer_2/loss=4.640, pursuer_3/loss=4.615, pursuer_4/loss=4.454, pursuer_5/loss=4.619, pursuer_6/loss=4.794, pursuer_7/loss=4.840, rew=345.45]                           


Epoch #154: test_reward: 569.643625 ± 111.098620, best_reward: 661.870625 ± 126.197348 in #37


Epoch #155: 20001it [01:25, 234.47it/s, env_step=3100000, len=1220, n/ep=0, n/st=800, pursuer_0/loss=4.112, pursuer_1/loss=4.283, pursuer_2/loss=4.692, pursuer_3/loss=4.628, pursuer_4/loss=4.554, pursuer_5/loss=4.864, pursuer_6/loss=4.942, pursuer_7/loss=4.887, rew=707.36]                           


Steps Policy Saved  1980
Epoch #155: test_reward: 601.583500 ± 102.601028, best_reward: 661.870625 ± 126.197348 in #37


Epoch #156: 20001it [01:25, 234.98it/s, env_step=3120000, len=1157, n/ep=0, n/st=800, pursuer_0/loss=4.117, pursuer_1/loss=4.153, pursuer_2/loss=4.539, pursuer_3/loss=4.649, pursuer_4/loss=4.554, pursuer_5/loss=4.677, pursuer_6/loss=4.835, pursuer_7/loss=5.025, rew=461.85]                           


Epoch #156: test_reward: 568.227937 ± 106.166508, best_reward: 661.870625 ± 126.197348 in #37


Epoch #157: 20001it [01:22, 241.72it/s, env_step=3140000, len=912, n/ep=0, n/st=800, pursuer_0/loss=4.030, pursuer_1/loss=4.246, pursuer_2/loss=4.603, pursuer_3/loss=4.526, pursuer_4/loss=4.500, pursuer_5/loss=4.536, pursuer_6/loss=4.836, pursuer_7/loss=4.777, rew=427.96]                            


Epoch #157: test_reward: 536.245937 ± 91.918078, best_reward: 661.870625 ± 126.197348 in #37


Epoch #158: 20001it [01:22, 241.40it/s, env_step=3160000, len=1544, n/ep=0, n/st=800, pursuer_0/loss=3.890, pursuer_1/loss=4.078, pursuer_2/loss=4.511, pursuer_3/loss=4.607, pursuer_4/loss=4.356, pursuer_5/loss=4.564, pursuer_6/loss=4.863, pursuer_7/loss=4.740, rew=562.43]                           


Epoch #158: test_reward: 576.167312 ± 101.229008, best_reward: 661.870625 ± 126.197348 in #37


Epoch #159: 20001it [01:26, 231.61it/s, env_step=3180000, len=1040, n/ep=3, n/st=800, pursuer_0/loss=3.901, pursuer_1/loss=4.186, pursuer_2/loss=4.513, pursuer_3/loss=4.674, pursuer_4/loss=4.677, pursuer_5/loss=4.741, pursuer_6/loss=4.823, pursuer_7/loss=4.832, rew=516.11]                           


Epoch #159: test_reward: 588.862375 ± 95.595349, best_reward: 661.870625 ± 126.197348 in #37


Epoch #160: 20001it [01:25, 233.36it/s, env_step=3200000, len=896, n/ep=0, n/st=800, pursuer_0/loss=3.869, pursuer_1/loss=4.011, pursuer_2/loss=4.545, pursuer_3/loss=4.529, pursuer_4/loss=4.461, pursuer_5/loss=4.714, pursuer_6/loss=4.754, pursuer_7/loss=4.781, rew=494.92]                            


Epoch #160: test_reward: 558.428125 ± 103.727465, best_reward: 661.870625 ± 126.197348 in #37


Epoch #161: 20001it [01:23, 240.90it/s, env_step=3220000, len=1312, n/ep=1, n/st=800, pursuer_0/loss=3.860, pursuer_1/loss=3.964, pursuer_2/loss=4.327, pursuer_3/loss=4.454, pursuer_4/loss=4.435, pursuer_5/loss=4.719, pursuer_6/loss=4.746, pursuer_7/loss=4.790, rew=583.31]                           


Epoch #161: test_reward: 531.121500 ± 92.971581, best_reward: 661.870625 ± 126.197348 in #37


Epoch #162: 20001it [01:23, 238.97it/s, env_step=3240000, len=1232, n/ep=2, n/st=800, pursuer_0/loss=3.843, pursuer_1/loss=3.904, pursuer_2/loss=4.472, pursuer_3/loss=4.378, pursuer_4/loss=4.367, pursuer_5/loss=4.742, pursuer_6/loss=4.688, pursuer_7/loss=4.716, rew=602.95]                           


Epoch #162: test_reward: 530.334000 ± 113.255998, best_reward: 661.870625 ± 126.197348 in #37


Epoch #163: 20001it [01:27, 229.33it/s, env_step=3260000, len=1328, n/ep=0, n/st=800, pursuer_0/loss=4.211, pursuer_1/loss=4.330, pursuer_2/loss=4.566, pursuer_3/loss=4.799, pursuer_4/loss=4.836, pursuer_5/loss=4.869, pursuer_6/loss=5.050, pursuer_7/loss=5.053, rew=419.80]                           


Epoch #163: test_reward: 604.121187 ± 83.040602, best_reward: 661.870625 ± 126.197348 in #37


Epoch #164: 20001it [01:29, 222.95it/s, env_step=3280000, len=978, n/ep=0, n/st=800, pursuer_0/loss=4.383, pursuer_1/loss=4.311, pursuer_2/loss=4.851, pursuer_3/loss=4.831, pursuer_4/loss=4.915, pursuer_5/loss=5.068, pursuer_6/loss=5.077, pursuer_7/loss=5.082, rew=549.30]                            


Epoch #164: test_reward: 540.195250 ± 99.883787, best_reward: 661.870625 ± 126.197348 in #37


Epoch #165: 20001it [01:26, 230.63it/s, env_step=3300000, len=1404, n/ep=2, n/st=800, pursuer_0/loss=4.025, pursuer_1/loss=3.910, pursuer_2/loss=4.584, pursuer_3/loss=4.502, pursuer_4/loss=4.651, pursuer_5/loss=4.723, pursuer_6/loss=4.896, pursuer_7/loss=4.782, rew=424.37]                           


Epoch #165: test_reward: 621.520750 ± 117.795782, best_reward: 661.870625 ± 126.197348 in #37


Epoch #166: 20001it [01:30, 220.59it/s, env_step=3320000, len=936, n/ep=0, n/st=800, pursuer_0/loss=4.077, pursuer_1/loss=4.096, pursuer_2/loss=4.533, pursuer_3/loss=4.664, pursuer_4/loss=4.588, pursuer_5/loss=4.709, pursuer_6/loss=4.863, pursuer_7/loss=4.697, rew=372.43]                            


Epoch #166: test_reward: 584.109250 ± 96.836435, best_reward: 661.870625 ± 126.197348 in #37


Epoch #167: 20001it [01:27, 228.80it/s, env_step=3340000, len=1236, n/ep=2, n/st=800, pursuer_0/loss=4.069, pursuer_1/loss=4.096, pursuer_2/loss=4.683, pursuer_3/loss=4.606, pursuer_4/loss=4.657, pursuer_5/loss=4.821, pursuer_6/loss=4.859, pursuer_7/loss=4.886, rew=490.38]                           


Steps Policy Saved  2140
Epoch #167: test_reward: 602.200812 ± 115.431624, best_reward: 661.870625 ± 126.197348 in #37


Epoch #168: 20001it [01:25, 232.79it/s, env_step=3360000, len=1192, n/ep=1, n/st=800, pursuer_0/loss=4.099, pursuer_1/loss=4.068, pursuer_2/loss=4.646, pursuer_3/loss=4.718, pursuer_4/loss=4.568, pursuer_5/loss=4.838, pursuer_6/loss=5.047, pursuer_7/loss=4.922, rew=498.82]                           


Epoch #168: test_reward: 571.804562 ± 100.565788, best_reward: 661.870625 ± 126.197348 in #37


Epoch #169: 20001it [01:26, 230.27it/s, env_step=3380000, len=1104, n/ep=1, n/st=800, pursuer_0/loss=4.065, pursuer_1/loss=4.142, pursuer_2/loss=4.620, pursuer_3/loss=4.716, pursuer_4/loss=4.467, pursuer_5/loss=5.051, pursuer_6/loss=4.842, pursuer_7/loss=4.900, rew=736.11]                           


Epoch #169: test_reward: 541.377625 ± 122.108177, best_reward: 661.870625 ± 126.197348 in #37


Epoch #170: 20001it [01:29, 223.35it/s, env_step=3400000, len=1056, n/ep=0, n/st=800, pursuer_0/loss=4.202, pursuer_1/loss=4.247, pursuer_2/loss=4.771, pursuer_3/loss=4.640, pursuer_4/loss=4.649, pursuer_5/loss=5.000, pursuer_6/loss=5.018, pursuer_7/loss=4.951, rew=763.92]                           


Epoch #170: test_reward: 540.729625 ± 62.640110, best_reward: 661.870625 ± 126.197348 in #37


Epoch #171: 20001it [01:30, 221.59it/s, env_step=3420000, len=888, n/ep=1, n/st=800, pursuer_0/loss=4.258, pursuer_1/loss=4.412, pursuer_2/loss=4.929, pursuer_3/loss=4.996, pursuer_4/loss=4.909, pursuer_5/loss=5.044, pursuer_6/loss=5.297, pursuer_7/loss=5.449, rew=439.90]                            


Epoch #171: test_reward: 586.406687 ± 108.364903, best_reward: 661.870625 ± 126.197348 in #37


Epoch #172: 20001it [01:31, 217.69it/s, env_step=3440000, len=1392, n/ep=1, n/st=800, pursuer_0/loss=4.198, pursuer_1/loss=4.316, pursuer_2/loss=4.800, pursuer_3/loss=4.720, pursuer_4/loss=4.843, pursuer_5/loss=4.924, pursuer_6/loss=5.086, pursuer_7/loss=5.283, rew=705.55]                           


Epoch #172: test_reward: 548.009187 ± 104.289325, best_reward: 661.870625 ± 126.197348 in #37


Epoch #173: 20001it [01:32, 215.84it/s, env_step=3460000, len=696, n/ep=1, n/st=800, pursuer_0/loss=4.142, pursuer_1/loss=4.293, pursuer_2/loss=4.839, pursuer_3/loss=4.767, pursuer_4/loss=4.723, pursuer_5/loss=4.955, pursuer_6/loss=4.985, pursuer_7/loss=5.254, rew=459.39]                            


Steps Policy Saved  2220
Epoch #173: test_reward: 532.374937 ± 78.553670, best_reward: 661.870625 ± 126.197348 in #37


Epoch #174: 20001it [01:31, 217.74it/s, env_step=3480000, len=1104, n/ep=1, n/st=800, pursuer_0/loss=4.431, pursuer_1/loss=4.190, pursuer_2/loss=4.766, pursuer_3/loss=4.954, pursuer_4/loss=4.843, pursuer_5/loss=4.957, pursuer_6/loss=5.068, pursuer_7/loss=5.270, rew=659.73]                           


Epoch #174: test_reward: 571.004750 ± 91.125898, best_reward: 661.870625 ± 126.197348 in #37


Epoch #175: 20001it [01:31, 219.61it/s, env_step=3500000, len=1110, n/ep=4, n/st=800, pursuer_0/loss=3.957, pursuer_1/loss=4.002, pursuer_2/loss=4.691, pursuer_3/loss=4.552, pursuer_4/loss=4.436, pursuer_5/loss=4.649, pursuer_6/loss=4.902, pursuer_7/loss=5.044, rew=518.07]                           


Epoch #175: test_reward: 559.373500 ± 84.878378, best_reward: 661.870625 ± 126.197348 in #37


Epoch #176: 20001it [01:31, 217.95it/s, env_step=3520000, len=1192, n/ep=0, n/st=800, pursuer_0/loss=4.123, pursuer_1/loss=4.039, pursuer_2/loss=4.566, pursuer_3/loss=4.585, pursuer_4/loss=4.539, pursuer_5/loss=4.672, pursuer_6/loss=4.937, pursuer_7/loss=5.143, rew=690.96]                           


Epoch #176: test_reward: 558.450125 ± 77.127149, best_reward: 661.870625 ± 126.197348 in #37


Epoch #177: 20001it [01:33, 213.80it/s, env_step=3540000, len=1176, n/ep=0, n/st=800, pursuer_0/loss=4.164, pursuer_1/loss=4.283, pursuer_2/loss=4.894, pursuer_3/loss=4.919, pursuer_4/loss=4.708, pursuer_5/loss=5.021, pursuer_6/loss=5.205, pursuer_7/loss=5.222, rew=777.24]                           


Epoch #177: test_reward: 558.537062 ± 104.678264, best_reward: 661.870625 ± 126.197348 in #37


Epoch #178: 20001it [01:27, 228.84it/s, env_step=3560000, len=1680, n/ep=1, n/st=800, pursuer_0/loss=4.055, pursuer_1/loss=4.109, pursuer_2/loss=4.580, pursuer_3/loss=4.536, pursuer_4/loss=4.514, pursuer_5/loss=4.666, pursuer_6/loss=4.889, pursuer_7/loss=5.009, rew=555.24]                           


Epoch #178: test_reward: 580.364812 ± 94.701128, best_reward: 661.870625 ± 126.197348 in #37


Epoch #179: 20001it [01:27, 229.00it/s, env_step=3580000, len=936, n/ep=2, n/st=800, pursuer_0/loss=4.147, pursuer_1/loss=4.200, pursuer_2/loss=4.580, pursuer_3/loss=4.787, pursuer_4/loss=4.547, pursuer_5/loss=4.896, pursuer_6/loss=4.798, pursuer_7/loss=5.239, rew=468.34]                            


Epoch #179: test_reward: 565.837250 ± 126.987425, best_reward: 661.870625 ± 126.197348 in #37


Epoch #180: 20001it [01:32, 216.35it/s, env_step=3600000, len=1328, n/ep=0, n/st=800, pursuer_0/loss=4.095, pursuer_1/loss=4.317, pursuer_2/loss=4.836, pursuer_3/loss=4.762, pursuer_4/loss=4.750, pursuer_5/loss=5.114, pursuer_6/loss=4.927, pursuer_7/loss=5.369, rew=711.84]                           


Epoch #180: test_reward: 568.585375 ± 119.533448, best_reward: 661.870625 ± 126.197348 in #37


Epoch #181: 20001it [01:33, 212.90it/s, env_step=3620000, len=1232, n/ep=0, n/st=800, pursuer_0/loss=4.055, pursuer_1/loss=4.092, pursuer_2/loss=4.614, pursuer_3/loss=4.843, pursuer_4/loss=4.443, pursuer_5/loss=5.006, pursuer_6/loss=4.834, pursuer_7/loss=5.236, rew=643.84]                           


Epoch #181: test_reward: 572.249250 ± 105.811472, best_reward: 661.870625 ± 126.197348 in #37


Epoch #182: 20001it [01:31, 218.32it/s, env_step=3640000, len=1104, n/ep=1, n/st=800, pursuer_0/loss=3.901, pursuer_1/loss=4.125, pursuer_2/loss=4.429, pursuer_3/loss=4.732, pursuer_4/loss=4.597, pursuer_5/loss=4.950, pursuer_6/loss=4.919, pursuer_7/loss=5.346, rew=514.13]                           


Epoch #182: test_reward: 581.479312 ± 106.430922, best_reward: 661.870625 ± 126.197348 in #37


Epoch #183: 20001it [01:31, 217.85it/s, env_step=3660000, len=1328, n/ep=1, n/st=800, pursuer_0/loss=4.254, pursuer_1/loss=4.282, pursuer_2/loss=4.875, pursuer_3/loss=4.935, pursuer_4/loss=4.783, pursuer_5/loss=5.079, pursuer_6/loss=5.009, pursuer_7/loss=5.401, rew=517.51]                           


Epoch #183: test_reward: 623.809937 ± 82.116323, best_reward: 661.870625 ± 126.197348 in #37


Epoch #184: 20001it [01:30, 220.04it/s, env_step=3680000, len=1040, n/ep=3, n/st=800, pursuer_0/loss=4.178, pursuer_1/loss=4.166, pursuer_2/loss=4.633, pursuer_3/loss=4.813, pursuer_4/loss=4.620, pursuer_5/loss=5.038, pursuer_6/loss=5.029, pursuer_7/loss=5.261, rew=656.61]                           


Epoch #184: test_reward: 563.353375 ± 93.834407, best_reward: 661.870625 ± 126.197348 in #37


Epoch #185: 20001it [01:29, 224.54it/s, env_step=3700000, len=584, n/ep=1, n/st=800, pursuer_0/loss=4.365, pursuer_1/loss=4.184, pursuer_2/loss=4.702, pursuer_3/loss=4.731, pursuer_4/loss=4.729, pursuer_5/loss=5.013, pursuer_6/loss=5.197, pursuer_7/loss=5.282, rew=514.96]                            


Epoch #185: test_reward: 529.294562 ± 108.633814, best_reward: 661.870625 ± 126.197348 in #37


Epoch #186: 20001it [01:28, 225.37it/s, env_step=3720000, len=880, n/ep=0, n/st=800, pursuer_0/loss=4.345, pursuer_1/loss=4.253, pursuer_2/loss=4.631, pursuer_3/loss=4.770, pursuer_4/loss=4.840, pursuer_5/loss=4.899, pursuer_6/loss=4.947, pursuer_7/loss=5.510, rew=447.16]                            


Epoch #186: test_reward: 595.291750 ± 101.849799, best_reward: 661.870625 ± 126.197348 in #37


Epoch #187: 20001it [01:33, 214.01it/s, env_step=3740000, len=1088, n/ep=0, n/st=800, pursuer_0/loss=4.241, pursuer_1/loss=4.148, pursuer_2/loss=4.474, pursuer_3/loss=4.869, pursuer_4/loss=4.803, pursuer_5/loss=4.893, pursuer_6/loss=4.893, pursuer_7/loss=5.212, rew=660.59]                           


Epoch #187: test_reward: 614.626875 ± 105.754508, best_reward: 661.870625 ± 126.197348 in #37


Epoch #188: 20001it [01:31, 219.09it/s, env_step=3760000, len=1176, n/ep=0, n/st=800, pursuer_0/loss=4.365, pursuer_1/loss=4.250, pursuer_2/loss=4.544, pursuer_3/loss=4.965, pursuer_4/loss=4.830, pursuer_5/loss=5.147, pursuer_6/loss=5.131, pursuer_7/loss=5.143, rew=533.45]                           


Epoch #188: test_reward: 559.109000 ± 105.364990, best_reward: 661.870625 ± 126.197348 in #37


Epoch #189: 20001it [01:31, 219.77it/s, env_step=3780000, len=1004, n/ep=2, n/st=800, pursuer_0/loss=4.388, pursuer_1/loss=4.304, pursuer_2/loss=4.605, pursuer_3/loss=4.744, pursuer_4/loss=4.856, pursuer_5/loss=5.199, pursuer_6/loss=5.151, pursuer_7/loss=5.268, rew=522.21]                           


Steps Policy Saved  2450
Epoch #189: test_reward: 538.176500 ± 99.913181, best_reward: 661.870625 ± 126.197348 in #37


Epoch #190: 20001it [01:30, 219.81it/s, env_step=3800000, len=1264, n/ep=0, n/st=800, pursuer_0/loss=4.387, pursuer_1/loss=4.332, pursuer_2/loss=4.747, pursuer_3/loss=4.950, pursuer_4/loss=4.831, pursuer_5/loss=5.048, pursuer_6/loss=5.017, pursuer_7/loss=5.442, rew=524.87]                           


Epoch #190: test_reward: 609.007562 ± 112.608963, best_reward: 661.870625 ± 126.197348 in #37


Epoch #191: 20001it [01:30, 221.34it/s, env_step=3820000, len=1330, n/ep=3, n/st=800, pursuer_0/loss=4.313, pursuer_1/loss=4.277, pursuer_2/loss=4.710, pursuer_3/loss=4.913, pursuer_4/loss=4.943, pursuer_5/loss=5.138, pursuer_6/loss=5.135, pursuer_7/loss=5.102, rew=605.61]                           


Epoch #191: test_reward: 560.879250 ± 82.369729, best_reward: 661.870625 ± 126.197348 in #37


Epoch #192: 20001it [01:27, 228.63it/s, env_step=3840000, len=1160, n/ep=2, n/st=800, pursuer_0/loss=4.300, pursuer_1/loss=4.173, pursuer_2/loss=4.605, pursuer_3/loss=4.933, pursuer_4/loss=4.761, pursuer_5/loss=5.126, pursuer_6/loss=5.071, pursuer_7/loss=5.127, rew=474.62]                           


Epoch #192: test_reward: 604.214000 ± 94.640602, best_reward: 661.870625 ± 126.197348 in #37


Epoch #193: 20001it [01:30, 221.71it/s, env_step=3860000, len=1048, n/ep=0, n/st=800, pursuer_0/loss=4.332, pursuer_1/loss=4.096, pursuer_2/loss=4.708, pursuer_3/loss=4.940, pursuer_4/loss=4.747, pursuer_5/loss=5.149, pursuer_6/loss=4.999, pursuer_7/loss=5.102, rew=675.12]                           


Epoch #193: test_reward: 544.064312 ± 100.739529, best_reward: 661.870625 ± 126.197348 in #37


Epoch #194: 20001it [01:31, 218.59it/s, env_step=3880000, len=920, n/ep=0, n/st=800, pursuer_0/loss=4.231, pursuer_1/loss=4.198, pursuer_2/loss=4.643, pursuer_3/loss=4.890, pursuer_4/loss=4.774, pursuer_5/loss=5.008, pursuer_6/loss=4.958, pursuer_7/loss=5.164, rew=343.90]                            


Epoch #194: test_reward: 563.174062 ± 117.872234, best_reward: 661.870625 ± 126.197348 in #37


Epoch #195: 20001it [01:32, 215.51it/s, env_step=3900000, len=928, n/ep=1, n/st=800, pursuer_0/loss=4.329, pursuer_1/loss=4.154, pursuer_2/loss=4.474, pursuer_3/loss=4.768, pursuer_4/loss=4.713, pursuer_5/loss=4.859, pursuer_6/loss=4.946, pursuer_7/loss=4.956, rew=546.99]                            


Epoch #195: test_reward: 564.429125 ± 107.645042, best_reward: 661.870625 ± 126.197348 in #37


Epoch #196: 20001it [01:29, 222.84it/s, env_step=3920000, len=1016, n/ep=1, n/st=800, pursuer_0/loss=4.411, pursuer_1/loss=3.947, pursuer_2/loss=4.531, pursuer_3/loss=4.794, pursuer_4/loss=4.743, pursuer_5/loss=5.171, pursuer_6/loss=5.217, pursuer_7/loss=5.126, rew=417.65]                           


Epoch #196: test_reward: 575.895687 ± 87.982322, best_reward: 661.870625 ± 126.197348 in #37


Epoch #197: 20001it [01:35, 209.79it/s, env_step=3940000, len=1132, n/ep=0, n/st=800, pursuer_0/loss=4.539, pursuer_1/loss=4.206, pursuer_2/loss=4.797, pursuer_3/loss=5.281, pursuer_4/loss=4.897, pursuer_5/loss=5.231, pursuer_6/loss=5.262, pursuer_7/loss=5.151, rew=649.84]                           


Epoch #197: test_reward: 544.955437 ± 113.811800, best_reward: 661.870625 ± 126.197348 in #37


Epoch #198: 20001it [01:33, 213.27it/s, env_step=3960000, len=1152, n/ep=1, n/st=800, pursuer_0/loss=4.645, pursuer_1/loss=4.560, pursuer_2/loss=4.963, pursuer_3/loss=5.235, pursuer_4/loss=5.031, pursuer_5/loss=5.344, pursuer_6/loss=5.462, pursuer_7/loss=5.173, rew=372.86]                           


Steps Policy Saved  2580
Epoch #198: test_reward: 592.901187 ± 85.715942, best_reward: 661.870625 ± 126.197348 in #37


Epoch #199: 20001it [01:30, 221.22it/s, env_step=3980000, len=1216, n/ep=1, n/st=800, pursuer_0/loss=4.342, pursuer_1/loss=4.074, pursuer_2/loss=4.649, pursuer_3/loss=4.990, pursuer_4/loss=4.741, pursuer_5/loss=4.935, pursuer_6/loss=5.126, pursuer_7/loss=4.989, rew=499.72]                           


Epoch #199: test_reward: 569.913687 ± 115.518046, best_reward: 661.870625 ± 126.197348 in #37


Epoch #200: 20001it [01:27, 227.35it/s, env_step=4000000, len=1208, n/ep=0, n/st=800, pursuer_0/loss=4.406, pursuer_1/loss=4.172, pursuer_2/loss=4.652, pursuer_3/loss=4.911, pursuer_4/loss=4.819, pursuer_5/loss=4.734, pursuer_6/loss=5.093, pursuer_7/loss=5.082, rew=680.76]                           


Epoch #200: test_reward: 571.973062 ± 95.391035, best_reward: 661.870625 ± 126.197348 in #37

{'duration': '26909.99s', 'train_time/model': '10549.96s', 'test_step': 4970104, 'test_episode': 4020, 'test_time': '9290.29s', 'test_speed': '534.98 step/s', 'best_reward': 661.8706249999989, 'best_result': '661.87 ± 126.20', 'train_step': 4000000, 'train_episode': 3099, 'train_time/collector': '7069.74s', 'train_speed': '227.02 step/s'}

(the trained policy can be accessed via policy.policies[agents[0]])


In [3]:
torch.save(policy.policies[agents[0]].state_dict(), model_save_path + "_" + str(global_step_holder[0]) + ".pth")
print("Steps Policy Saved " , str(global_step_holder[0]))
            

Steps Policy Saved  2608


In [4]:
def _get_envT():
    """This function is needed to provide callables for DummyVectorEnv."""   
    # env_paralell = MultiUAVEnv()  
    # env = pursuit_v4.env()

    env = TaskPursuitEnv.env(
            max_cycles=SISL_Config["max_cycles"],
            x_size=SISL_Config["x_size"],
            y_size=SISL_Config["y_size"],
            shared_reward=SISL_Config["shared_reward"],
            n_evaders=SISL_Config["n_evaders"],
            n_pursuers=SISL_Config["n_pursuers"],
            obs_range=SISL_Config["obs_range"],
            n_catch=SISL_Config["n_catch"],
            freeze_evaders=SISL_Config["freeze_evaders"],
            tag_reward=SISL_Config["tag_reward"],
            catch_reward=SISL_Config["catch_reward"],
            urgency_reward=SISL_Config["urgency_reward"],
            surround=SISL_Config["surround"],
            constraint_window=SISL_Config["constraint_window"],
            # att_memory = SISL_Config["att_memory"],
            #render_mode= "human"#True
            render_mode= None#"html"#"human" #"human"#True
    )
           
    #env = parallel_to_aec_wrapper(env_paralell)    
    # env = CustomParallelToAECWrapper(env_paralell)
    
    return PettingZooEnv(env)   


policy, optim, agents = _get_agents()
test_env_num = 1
 # ======== Step 1: Environment setup =========

test_envs = DummyVectorEnv([_get_envT for _ in range(test_env_num)]) 

# seed
seed = 0
np.random.seed(seed)

torch.manual_seed(seed)
test_envs.seed(seed)

episodes =  1
render  = False
# Load the saved checkpoint
for agent in agents:    
    
    # if Policy_Config["same_policy"]:
    #     model_path = os.path.join("dqn_SISL", name + ".pth")                            
    # else:
    #     model_path = os.path.join("dqn_SISL", name + agent + ".pth") 

    policy.policies[agent].set_eps(0.00)
    # policy.policies[agent].load_state_dict(torch.load(model_load_path))
    policy.policies[agent].eval()
    
# envs = DummyVectorEnv([_get_env for _ in range(1)])

collector = CustomCollector(policy, test_envs, exploration_noise=False)

results = collector.collect(n_episode=episodes, render=0.02 if render else None)#0.02)#, gym_reset_kwargs={'seed' :2})

print("FinalRew: ", np.sum(results['rews'], axis = 1))
print("Finished: ", results['lens'] , " Steps")

FinalRew:  [-200.1125]
Finished:  [4000]  Steps
