# Iniciar ambiente

## Iniciar Local

In [2]:
import os
isColab = False

## (Sempre) Outras configurações

In [3]:
# # Ambiente da competição
# !pip install --upgrade ceia-soccer-twos > /dev/null 2>&1
# # a versão do ray compatível com a implementação dos agentes disponibilizada é a 1.4.0
# !pip install 'aioredis==1.3.1' > /dev/null 2>&1
# !pip install 'aiohttp==3.7.4' > /dev/null 2>&1
# !pip install 'ray==1.4.0' > /dev/null 2>&1
# !pip install 'ray[rllib]==1.4.0' > /dev/null 2>&1
# !pip install 'ray[tune]==1.4.0' > /dev/null 2>&1
# !pip install torch > /dev/null 2>&1
# !pip install lz4 > /dev/null 2>&1
# !pip install GPUtil > /dev/null 2>&1

# # Dependências necessárias para gravar os vídeos
# !apt-get install - y xvfb x11-utils > /dev/null 2>&1
# !pip install 'pyvirtualdisplay==0.2.*' > /dev/null 2>&1
# !pip install tensorboard > /dev/null 2>&1


In [4]:
!pip show ray

Name: ray
Version: 1.4.0
Summary: Ray provides a simple, universal API for building distributed applications.
Home-page: https://github.com/ray-project/ray
Author: Ray Team
Author-email: ray-dev@googlegroups.com
License: Apache 2.0
Location: /home/bruno/anaconda3/envs/soccer-twos/lib/python3.8/site-packages
Requires: redis, jsonschema, aiohttp, grpcio, filelock, colorama, pyyaml, gpustat, aioredis, numpy, pydantic, prometheus-client, msgpack, py-spy, requests, protobuf, opencensus, click, aiohttp-cors
Required-by: 


# Soccer Twos

Como tarefa bônus, experimente com os algoritmos aprendidos no ambiente `soccer_twos`, que será utilizado na competição final deste curso*. Para facilitar, utilize a variação `team_vs_policy` como no laboratório anterior.

<img src="https://raw.githubusercontent.com/bryanoliveira/soccer-twos-env/master/images/screenshot.png" height="400">

> Visualização do ambiente

Este ambiente consiste em um jogo de futebol de carros 2x2, ou seja, o objetivo é marcar um gol no adversário o mais rápido possível. Na variação `team_vs_policy`, seu agente controla um jogador do time azul e joga contra um time aleatório. Mais informações sobre o ambiente podem ser encontradas [no repositório](https://github.com/bryanoliveira/soccer-twos-env) e [na documentação do Unity ml-agents](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md#soccer-twos).


**Sua tarefa é treinar um agente com a interface do Ray apresentada, experimentando com diferentes algoritmos e hiperparâmetros.**


<br>

*A variação utilizada na competição será a `multiagent_player`, mas agentes treinados para `team_vs_policy` podem ser facilmente adaptados. Na seção "Exportando seu agente treinado" o agente "MyDqnSoccerAgent" faz exatamente isso.

## Imports

In [5]:
import gym

import ray
from ray import tune
from ray.tune import Analysis
# from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.env import BaseEnv
from ray.rllib.evaluation.episode import MultiAgentEpisode
from ray.rllib.evaluation.rollout_worker import RolloutWorker
from ray.rllib.policy import Policy
from ray.rllib.agents.callbacks import DefaultCallbacks
# from ray.rllib.utils.typing import PolicyID
# from ray.tune.registry import get_trainable_cls
# from ray.rllib.policy.policy import PolicySpec

import numpy as np
from typing import Any, Dict, List, Union, Optional
from collections import deque
# import pickle
from pprint import pprint

import soccer_twos
from soccer_twos import EnvType
from soccer_twos.side_channels import EnvConfigurationChannel

import shutil

## Wrapper

In [6]:
import gym
from typing import Any, Dict, List, Union

from ray.rllib.env.multi_agent_env import MultiAgentEnv
import numpy as np
from collections import deque

MAX_STEPS = 1000
MATCH_STEPS = 5000


def get_scalar_projection(x, y):
    assert np.linalg.norm(y) > 0.000001
    return np.dot(x, y) / np.linalg.norm(y)


# Os seguintes valores foram obtidos experimentalmente executando pré-experimentos
# A partir desses valores vamops derivar vários outros como posições ddos gols etc
min_ball_position_x, max_ball_position_x = - \
    15.563264846801758, 15.682827949523926
min_ball_position_y, max_ball_position_y = -7.08929967880249, 7.223850250244141
min_player_position_x, max_player_position_x = - \
    17.26804542541504, 17.16301727294922
min_player_position_y, max_player_position_y = - \
    7.399587631225586, 7.406457424163818
min_ball_to_goal_avg_velocity, max_ball_to_goal_avg_velocity = - \
    -23.366606239568615, 23.749571761530724

max_ball_abs_velocity = 78.25721740722656
max_goals_one_team = -9999999
max_goals_one_match = -9999999
max_steps = -999999

max_diff_reward = -np.inf

# Infered
max_ball_abs_avg_velocity = max(
    abs(min_ball_to_goal_avg_velocity), abs(max_ball_to_goal_avg_velocity))


SPEED_IMPORTANCE = 1.0 / (14.0)
CLIP_SPEED_REWARD_BY_SPEED_IMPORTANCE = True

AFTER_BALL_STEP_PENALTY = 1 / MAX_STEPS  # 0.001

# OBS.: Este hyperparâmetro não pode ser modificado sem fazer novos testes em
# min_ball_to_goal_avg_velocity e
# max_ball_to_goal_avg_velocity:
AVG_SPEED_TIMESTEPS_WINDOW = 1


def is_after_the_ball(player_id: int, player_pos: np.array, ball_pos: np.array):
    if player_id in range(2):
        return player_pos[0] > ball_pos[0]
    elif player_id in [2, 3]:
        return player_pos[0] < ball_pos[0]


def get_center_of_goal_pos(player_id):
    global min_ball_position_x, max_ball_position_x, \
        min_ball_position_y, max_ball_position_y, \
        min_player_position_x, max_player_position_x, \
        min_player_position_y, max_player_position_y
    if player_id in [0, 1]:
        return np.array([max_ball_position_x, 0.0])
    elif player_id in [2, 3]:
        return np.array([min_ball_position_x, 0.0])


def calculate_ball_to_goal_scalar_velocity(player_id: int, info: Dict, x_axis_only=True):
    ball_velocity = info["ball_info"]["velocity"]
    if x_axis_only and player_id in [0, 1]:
        return ball_velocity[0]
    elif x_axis_only and player_id in [2, 3]:
        return -ball_velocity[0]

    goal_pos = get_center_of_goal_pos(player_id)
    ball_pos = info["ball_info"]["position"]

    # print(f"ball_pos: {ball_pos}")
    direction_to_center_of_goal = goal_pos - ball_pos
    # print(f"direction_to_center_of_goal: {direction_to_center_of_goal}")

    # global max_ball_abs_velocity
    # if np.linalg.norm(ball_velocity) > max_ball_abs_velocity:
    #     max_ball_abs_velocity = np.linalg.norm(ball_velocity)

    # print(f"ball_velocity: {ball_velocity}")
    ball_velocity_to_center_of_goal = get_scalar_projection(
        ball_velocity, direction_to_center_of_goal)
    # print(f"ball_velocity_to_center_of_goal: {ball_velocity_to_center_of_goal}")
    return ball_velocity_to_center_of_goal

# print('ball_velocity_to_center_of_goal', calculate_ball_to_goal_scalar_velocity(0, { "ball_info": { "position": np.array([3.0, 2.0]), "velocity": np.array([0.0, 0.0]) }}))


def calculate_distance(pt1: np.ndarray, pt2: np.ndarray):
    assert pt1.shape == (2,) and pt2.shape == (2,)
    return np.linalg.norm(pt1 - pt2)


class CustomRewardWrapper(gym.core.Wrapper, MultiAgentEnv):
    # def __init__(self, env):
    #     gym.Wrapper.__init__(self, env)

    def step(self, action: Union[Dict[int, List[Any]], List[Any]]):
        obs, rewards, done, info = super().step(action)

        # print(info)
        # if rewards[0] > 0.0:
        #     assert False

        ball_pos = info[0]["ball_info"]["position"]
        ball_velocity = info[0]["ball_info"]["velocity"]
        player0_pos = info[0]["player_info"]["position"]
        player1_pos = info[1]["player_info"]["position"]
        player2_pos = info[2]["player_info"]["position"]
        player3_pos = info[3]["player_info"]["position"]

        # print('ball_velocity', ball_velocity)
        if self._was_ball_effective_touched(self.prev_ball_velocity, ball_velocity):
            ball_toucher = self._get_ball_toucher(
                ball_velocity, ball_pos, player0_pos, player1_pos, player2_pos, player3_pos)
            # self.ball_touchers.append(ball_toucher)

        if type(action) is dict:
            new_rewards = {k: self._calculate_reward(
                rewards[k], k, info[k]) for k in info.keys()}
        else:
            raise NotImplementedError('Necessário implementar!')

        if type(action) is dict:
            splitted_rets = {k: self._calculate_reward(
                rewards[k], k, info[k], splitted_returns=True) for k in info.keys()}
        else:
            raise NotImplementedError('Necessário implementar!')

        info = {
            i: {
                **info[i],
                "ep_metrics": {
                    # "total_timesteps": np.array([0.0008], dtype=np.float32)
                    "total_timesteps": self.n_step + 1,
                    "total_goals": self.scoreboard["team_0"] + self.scoreboard["team_1"],
                    "goals_opponent": self.scoreboard["team_1"] if i in range(2) else self.scoreboard["team_0"],
                    "goals_in_favor": self.scoreboard["team_0"] if i in range(2) else self.scoreboard["team_1"],
                    "team_0_goals": self.scoreboard["team_0"],
                    "team_1_goals": self.scoreboard["team_1"],
                    "episode_ended": done["__all__"],
                    "have_goals": self.scoreboard["team_0"] + self.scoreboard["team_1"] > 0,
                    "env_reward": splitted_rets[i][0],
                    "ball_to_goal_speed_reward": splitted_rets[i][1],
                    # "agent_position_to_ball_reward": splitted_rets[i][2],
                }
            } for i in info.keys()
        }

        # global min_ball_position_x, max_ball_position_x, \
        #     min_ball_position_y, max_ball_position_y, \
        #     min_player_position_x, max_player_position_x, \
        #     min_player_position_y, max_player_position_y, \
        #     max_goals_one_team, max_goals_one_match
        # if done:
        #     print(f'min_ball_position_x: {min_ball_position_x}')
        #     print(f'max_ball_position_x: {max_ball_position_x}')
        #     print(f'min_ball_position_y: {min_ball_position_y}')
        #     print(f'max_ball_position_y: {max_ball_position_y}')
        #     print(f'min_player_position_x: {min_player_position_x}')
        #     print(f'max_player_position_x: {max_player_position_x}')
        #     print(f'min_player_position_y: {min_player_position_y}')
        #     print(f'max_player_position_y: {max_player_position_y}')
        #     print(f'min_ball_to_goal_avg_velocity: {min_ball_to_goal_avg_velocity}')
        #     print(f'max_ball_to_goal_avg_velocity: {max_ball_to_goal_avg_velocity}')
        #     print(f'max_goals_one_team: {max_goals_one_team}')
        #     print(f'max_goals_one_match: {max_goals_one_match}')
        #     print(self.scoreboard)
        #     print(f'Done... last n_step: {self.n_step}')
        #     if self.scoreboard["team_0"] > 0 or self.scoreboard["team_1"] > 0:
        #         input("Press Enter to continue...")

        # global max_steps
        # if done:
        #     if self.n_step + 1 > max_steps:
        #         max_steps = self.n_step + 1
        #     print('max_steps', max_steps)

        # global max_diff_reward
        # if done:
        #     print(f'max_diff_reward: {max_diff_reward}')
        #     print(f'min_ball_to_goal_avg_velocity: {min_ball_to_goal_avg_velocity}')
        #     print(f'max_ball_to_goal_avg_velocity: {max_ball_to_goal_avg_velocity}')

        # if done:
        #     print(f'max_ball_abs_velocity: {max_ball_abs_velocity}')
        # if done:
        #     print('self.ball_touched', self.ball_touched)
        #     print('self.ball_touchers', self.ball_touchers)

        self.n_step += 1
        self.prev_ball_velocity = ball_velocity.copy()

        return obs, new_rewards, done, info
            
    def reset(self, **kwargs):
        obs = super().reset(**kwargs)
        self.n_step = 0
        self.last_ball_speed_mean_per_player = {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0}
        self.ball_speed_deque_per_player = {0: deque(maxlen=AVG_SPEED_TIMESTEPS_WINDOW),
                                            1: deque(maxlen=AVG_SPEED_TIMESTEPS_WINDOW),
                                            2: deque(maxlen=AVG_SPEED_TIMESTEPS_WINDOW),
                                            3: deque(maxlen=AVG_SPEED_TIMESTEPS_WINDOW)}
        self.scoreboard = {"team_0": 0, "team_1": 0}
        self.await_press = False
        self.prev_ball_velocity = np.array([0.0, 0.0])
        self.last_ball_toucher = -1
        # self.ball_touched = []
        # self.ball_touchers = []
        # print(f'min_ball_to_goal_avg_velocity: {min_ball_to_goal_avg_velocity}')
        # print(f'max_ball_to_goal_avg_velocity: {max_ball_to_goal_avg_velocity}')
        return obs

    def _was_ball_effective_touched(self, prev_ball_velocity: np.ndarray, curr_ball_velocity: np.ndarray):
        """Get if ball was touched (either by player or wall)

        Args:
            prev_ball_velocity (np.ndarray): Previous ball coordinates
            curr_ball_velocity (np.ndarray): Current ball coordinates
        """
        assert prev_ball_velocity.shape == (
            2,) and curr_ball_velocity.shape == (2,)
        percentual_scalar_thresold = 0.2  # 20%
        diff = curr_ball_velocity - prev_ball_velocity


        if np.linalg.norm(curr_ball_velocity) < 1.0:
            self.last_ball_toucher = -1

        if np.linalg.norm(prev_ball_velocity) > 0.0000001:
            return np.linalg.norm(diff) / np.linalg.norm(prev_ball_velocity) > percentual_scalar_thresold
        return np.linalg.norm(curr_ball_velocity) > np.linalg.norm(prev_ball_velocity)

    def _get_ball_toucher(self,
                          ball_velocity: np.ndarray,
                          ball_position: np.ndarray,
                          player_0_pos: np.ndarray,
                          player_1_pos: np.ndarray,
                          player_2_pos: np.ndarray,
                          player_3_pos: np.ndarray):
        assert ball_position.shape == (2,) and \
            player_0_pos.shape == (2,) and \
            player_1_pos.shape == (2,) and \
            player_2_pos.shape == (2,) and \
            player_3_pos.shape == (2,)
        top_wall_y = max_ball_position_y
        bottom_wall_y = min_ball_position_y
        left_wall_x = min_ball_position_x
        right_wall_x = max_ball_position_x

        if np.linalg.norm(ball_velocity) > 0.000001:
            distances = np.array([
                calculate_distance(ball_position, player_0_pos),
                calculate_distance(ball_position, player_1_pos),
                calculate_distance(ball_position, player_2_pos),
                calculate_distance(ball_position, player_3_pos),
                np.abs(ball_position[1] - top_wall_y),
                np.abs(ball_position[1] - bottom_wall_y),
                np.abs(ball_position[0] - left_wall_x),
                np.abs(ball_position[0] - right_wall_x)
            ])

            # print(distances)
            nearest = np.argmin(distances)
            # print(nearest)
            if nearest < 4:
                self.last_ball_toucher = nearest

        return self.last_ball_toucher

    def _calculate_reward(self, reward: float, player_id: int, info: Dict, splitted_returns=False) -> float:
        # print('calculating reward')
        if reward != 0.0:
            # print('Goal was made!', reward, info)
            self._update_scoreboard(player_id, reward)
        # global min_ball_position_x, max_ball_position_x, \
        #     min_ball_position_y, max_ball_position_y, \
        #     min_player_position_x, max_player_position_x, \
        #     min_player_position_y, max_player_position_y
        # print(f"info: {info}")
        # if info["ball_info"]["position"][0] < min_ball_position_x:
        #     min_ball_position_x = info["ball_info"]["position"][0]
        # if info["ball_info"]["position"][0] > max_ball_position_x:
        #     max_ball_position_x = info["ball_info"]["position"][0]
        # if info["ball_info"]["position"][1] < min_ball_position_y:
        #     min_ball_position_y = info["ball_info"]["position"][1]
        # if info["ball_info"]["position"][1] > max_ball_position_y:
        #     max_ball_position_y = info["ball_info"]["position"][1]
        # if info["player_info"]["position"][0] < min_player_position_x:
        #     min_player_position_x = info["player_info"]["position"][0]
        # if info["player_info"]["position"][0] > max_player_position_x:
        #     max_player_position_x = info["player_info"]["position"][0]
        # if info["player_info"]["position"][1] < min_player_position_y:
        #     min_player_position_y = info["player_info"]["position"][1]
        # if info["player_info"]["position"][1] > max_player_position_y:
        #     max_player_position_y = info["player_info"]["position"][1]

        self._update_avg_ball_speed_to_goal(
            player_id, calculate_ball_to_goal_scalar_velocity(player_id, info))
        # global max_diff_reward
        # if (np.abs(SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity) > max_diff_reward):
        #     max_diff_reward = SPEED_IMPORTANCE * \
        #         self.last_ball_speed_mean_per_player[player_id] / \
        #         max_ball_abs_avg_velocity

        # ball_pos = info["ball_info"]["position"]
        # player_pos = info["player_info"]["position"]

        env_reward = reward
        
        ball_to_goal_speed_reward = np.clip(SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity, -SPEED_IMPORTANCE,
                                            SPEED_IMPORTANCE) if CLIP_SPEED_REWARD_BY_SPEED_IMPORTANCE else SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity
        ball_to_goal_speed_reward = (
            player_id == self.last_ball_toucher) * ball_to_goal_speed_reward
        # agent_position_to_ball_reward = is_after_the_ball(player_id, player_pos,
        #                                                   ball_pos) * (-AFTER_BALL_STEP_PENALTY)

        # if splitted_returns:
        #     return (env_reward, ball_to_goal_speed_reward, agent_position_to_ball_reward)
        # return env_reward + ball_to_goal_speed_reward + agent_position_to_ball_reward
        if splitted_returns:
            return (env_reward, ball_to_goal_speed_reward)
        return env_reward + ball_to_goal_speed_reward

    def _update_avg_ball_speed_to_goal(self, player_id: int, ball_speed: float):
        assert player_id in [0, 1, 2, 3]
        # global min_ball_to_goal_avg_velocity, max_ball_to_goal_avg_velocity

        # Getting min/max ball to goal speed forr normalization
        # print(f'player_id: {player_id}')
        # print(f'self.last_ball_speed_mean_per_player: {self.last_ball_speed_mean_per_player}')
        # print(f'self.n_step: {self.n_step}')
        # print(f'ball_speed: {ball_speed}')

        self.ball_speed_deque_per_player[player_id].append(ball_speed)
        avg = np.mean(self.ball_speed_deque_per_player[player_id])
        # if avg < min_ball_to_goal_avg_velocity:
        #     min_ball_to_goal_avg_velocity = avg
        # elif avg > max_ball_to_goal_avg_velocity:
        #     max_ball_to_goal_avg_velocity = avg

        self.last_ball_speed_mean_per_player[player_id] = avg

    def _update_scoreboard(self, player_id, reward):
        # global max_goals_one_team, max_goals_one_match

        if player_id == 0 and reward == -1.0:
            self.scoreboard["team_1"] += 1
            # print(self.scoreboard)

            # if self.scoreboard["team_1"] > max_goals_one_team:
            #     max_goals_one_team = self.scoreboard["team_1"]
            # if self.scoreboard["team_0"] + self.scoreboard["team_1"] > max_goals_one_match:
            #     max_goals_one_match = self.scoreboard["team_0"] + \
            #         self.scoreboard["team_1"]
            # if max_goals_one_match > 0:
            #     if not self.await_press:
            #         input("Press Enter to continue...")
            #         self.await_press = True
            #     else:
            #         self.await_press = False
        elif player_id == 2 and reward == -1.0:
            self.scoreboard["team_0"] += 1
            # print(self.scoreboard)

            # if self.scoreboard["team_0"] > max_goals_one_team:
            #     max_goals_one_team = self.scoreboard["team_0"]
            # if self.scoreboard["team_0"] + self.scoreboard["team_1"] > max_goals_one_match:
            #     max_goals_one_match = self.scoreboard["team_0"] + \
            #         self.scoreboard["team_1"]
            # if max_goals_one_match > 0:
            #     if not self.await_press:
            #         input("Press Enter to continue...")
            #         self.await_press = True
            #     else:
            #         self.await_press = False


## Utils

In [7]:
class RLLibWrapper(gym.core.Wrapper, MultiAgentEnv):
    """
    A RLLib wrapper so our env can inherit from MultiAgentEnv.
    """

    pass


def create_rllib_env(env_config: dict = {}):
    """
    Creates a RLLib environment and prepares it to be instantiated by Ray workers.
    Args:
        env_config: configuration for the environment.
            You may specify the following keys:
            - variation: one of soccer_twos.EnvType. Defaults to EnvType.multiagent_player.
            - opponent_policy: a Callable for your agent to train against. Defaults to a random policy.
    """
    if hasattr(env_config, "worker_index"):
        env_config["worker_id"] = (
            env_config.worker_index * env_config.get("num_envs_per_worker", 1)
            + env_config.vector_index
        )
    env = soccer_twos.make(**env_config)
    if "multiagent" in env_config and not env_config["multiagent"]:
        # is multiagent by default, is only disabled if explicitly set to False
        return env
    return RLLibWrapper(env)


def create_custom_env(env_config: dict = {}):
    env = create_rllib_env(env_config)
    return CustomRewardWrapper(env)

## Callback

In [8]:
WIN_RATE_THEWSHOLD = .2
env_channel = EnvConfigurationChannel()

class SelfPlayCallback(DefaultCallbacks):
    def __init__(self):
        super().__init__()

    def on_episode_step(self,
                        *,
                        worker: "RolloutWorker",
                        base_env: BaseEnv,
                        episode: MultiAgentEpisode,
                        env_index: Optional[int] = None,
                        **kwargs) -> None:
        total_timesteps = episode.last_info_for(
            0)["ep_metrics"]["total_timesteps"]
        total_goals = float(episode.last_info_for(0)[
                            "ep_metrics"]["total_goals"])
        estimated_goals_in_match = total_goals * MATCH_STEPS / \
            float(total_timesteps) if total_goals > 0 else 0.0
        timesteps_to_goal = float(
            total_timesteps) if total_goals > 0 else 9999.0

        if not episode.user_data:
            episode.user_data = {
                0: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    # "total_agent_position_to_ball_reward": 0.0,
                },
                1: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    # "total_agent_position_to_ball_reward": 0.0,
                },
                2: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    # "total_agent_position_to_ball_reward": 0.0,
                },
                3: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    # "total_agent_position_to_ball_reward": 0.0,
                }
            }

        episode.user_data = {
            **episode.user_data,
            0: {
                "total_env_reward": episode.user_data[0]["total_env_reward"] + episode.last_info_for(0)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[0]["total_ball_to_goal_speed_reward"] + episode.last_info_for(0)["ep_metrics"]["ball_to_goal_speed_reward"],
                # "total_agent_position_to_ball_reward": episode.user_data[0]["total_agent_position_to_ball_reward"] + episode.last_info_for(0)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            1: {
                "total_env_reward": episode.user_data[1]["total_env_reward"] + episode.last_info_for(1)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[1]["total_ball_to_goal_speed_reward"] + episode.last_info_for(1)["ep_metrics"]["ball_to_goal_speed_reward"],
                # "total_agent_position_to_ball_reward": episode.user_data[1]["total_agent_position_to_ball_reward"] + episode.last_info_for(1)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            2: {
                "total_env_reward": episode.user_data[2]["total_env_reward"] + episode.last_info_for(2)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[2]["total_ball_to_goal_speed_reward"] + episode.last_info_for(2)["ep_metrics"]["ball_to_goal_speed_reward"],
                # "total_agent_position_to_ball_reward": episode.user_data[2]["total_agent_position_to_ball_reward"] + episode.last_info_for(2)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            3: {
                "total_env_reward": episode.user_data[3]["total_env_reward"] + episode.last_info_for(3)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[3]["total_ball_to_goal_speed_reward"] + episode.last_info_for(3)["ep_metrics"]["ball_to_goal_speed_reward"],
                # "total_agent_position_to_ball_reward": episode.user_data[3]["total_agent_position_to_ball_reward"] + episode.last_info_for(3)["ep_metrics"]["agent_position_to_ball_reward"],
            }
        }

        episode.custom_metrics = {
            # "total_timesteps": total_timesteps,
            # "timesteps_to_goal": timesteps_to_goal,
            # "estimated_goals_in_match": estimated_goals_in_match,
            # "team_0_goals": episode.last_info_for(0)["ep_metrics"]["team_0_goals"],
            # "team_1_goals": episode.last_info_for(0)["ep_metrics"]["team_1_goals"],
            # "have_goals": episode.last_info_for(0)["ep_metrics"]["have_goals"],
            "agent_0_total_env_reward": episode.user_data[0]["total_env_reward"],
            "agent_0_total_ball_to_goal_speed_reward": episode.user_data[0]["total_ball_to_goal_speed_reward"],
            # "agent_0_total_agent_position_to_ball_reward": episode.user_data[0]["total_agent_position_to_ball_reward"],
        }

    # def on_episode_end(self,
    #                    *,
    #                    worker: "RolloutWorker",
    #                    base_env: BaseEnv,
    #                    policies: Dict[PolicyID, Policy],
    #                    episode: MultiAgentEpisode,
    #                    env_index: Optional[int] = None,
    #                    **kwargs) -> None:
    #     total_timesteps = episode.last_info_for(
    #         0)["ep_metrics"]["total_timesteps"]
    #     total_goals = float(episode.last_info_for(0)[
    #                         "ep_metrics"]["total_goals"])
    #     estimated_goals_in_match = total_goals * MATCH_STEPS / \
    #         float(total_timesteps) if total_goals > 0 else 0.0
    #     timesteps_to_goal = float(
    #         total_timesteps) if total_goals > 0 else 9999.0

    #     episode.user_data = {
    #         **episode.user_data,
    #         0: {
    #             "total_env_reward": episode.user_data[0]["total_env_reward"] + episode.last_info_for(0)["ep_metrics"]["env_reward"],
    #             "total_ball_to_goal_speed_reward": episode.user_data[0]["total_ball_to_goal_speed_reward"] + episode.last_info_for(0)["ep_metrics"]["ball_to_goal_speed_reward"],
    #             # "total_agent_position_to_ball_reward": episode.user_data[0]["total_agent_position_to_ball_reward"] + episode.last_info_for(0)["ep_metrics"]["agent_position_to_ball_reward"],
    #         },
    #         1: {
    #             "total_env_reward": episode.user_data[1]["total_env_reward"] + episode.last_info_for(1)["ep_metrics"]["env_reward"],
    #             "total_ball_to_goal_speed_reward": episode.user_data[1]["total_ball_to_goal_speed_reward"] + episode.last_info_for(1)["ep_metrics"]["ball_to_goal_speed_reward"],
    #             # "total_agent_position_to_ball_reward": episode.user_data[1]["total_agent_position_to_ball_reward"] + episode.last_info_for(1)["ep_metrics"]["agent_position_to_ball_reward"],
    #         },
    #         2: {
    #             "total_env_reward": episode.user_data[2]["total_env_reward"] + episode.last_info_for(2)["ep_metrics"]["env_reward"],
    #             "total_ball_to_goal_speed_reward": episode.user_data[2]["total_ball_to_goal_speed_reward"] + episode.last_info_for(2)["ep_metrics"]["ball_to_goal_speed_reward"],
    #             # "total_agent_position_to_ball_reward": episode.user_data[2]["total_agent_position_to_ball_reward"] + episode.last_info_for(2)["ep_metrics"]["agent_position_to_ball_reward"],
    #         },
    #         3: {
    #             "total_env_reward": episode.user_data[3]["total_env_reward"] + episode.last_info_for(3)["ep_metrics"]["env_reward"],
    #             "total_ball_to_goal_speed_reward": episode.user_data[3]["total_ball_to_goal_speed_reward"] + episode.last_info_for(3)["ep_metrics"]["ball_to_goal_speed_reward"],
    #             # "total_agent_position_to_ball_reward": episode.user_data[3]["total_agent_position_to_ball_reward"] + episode.last_info_for(3)["ep_metrics"]["agent_position_to_ball_reward"],
    #         }
    #     }

    #     episode.custom_metrics = {
    #         # "total_timesteps": total_timesteps,
    #         # "timesteps_to_goal": timesteps_to_goal,
    #         # "estimated_goals_in_match": estimated_goals_in_match,
    #         # "team_0_goals": episode.last_info_for(0)["ep_metrics"]["team_0_goals"],
    #         # "team_1_goals": episode.last_info_for(0)["ep_metrics"]["team_1_goals"],
    #         # "have_goals": episode.last_info_for(0)["ep_metrics"]["have_goals"],
    #         "agent_0_total_env_reward": episode.user_data[0]["total_env_reward"],
    #         "agent_0_total_ball_to_goal_speed_reward": episode.user_data[0]["total_ball_to_goal_speed_reward"],
    #         # "agent_0_total_agent_position_to_ball_reward": episode.user_data[0]["total_agent_position_to_ball_reward"],
    #     }

    # def __init__(self):
    #     super().__init__()
    #     # 0=RandomPolicy, 1=1st main policy snapshot,
    #     # 2=2nd main policy snapshot, etc..
    #     self.current_opponent = 0

    # def on_train_result(self, *, trainer, result, **kwargs):
    #     # Get the win rate for the train batch.
    #     # Note that normally, one should set up a proper evaluation config,
    #     # such that evaluation always happens on the already updated policy,
    #     # instead of on the already used train_batch.
    #     # print("result", result)
    #     # print("result[hist_stats]", result["hist_stats"])
    #     main_rew = result["hist_stats"].pop("policy_main_reward")
    #     opponent_rew = result["hist_stats"].pop("policy_random_reward")
    #     # opponent_rew = list(result["hist_stats"].values())[0]
    #     # print('len(main_rew)', len(main_rew))
    #     # print("len(opponent_rew)", len(opponent_rew))
    #     assert len(main_rew) == len(opponent_rew)
    #     won = 0
    #     for r_main, r_opponent in zip(main_rew, opponent_rew):
    #         if r_main > r_opponent:
    #             won += 1
    #     win_rate = won / len(main_rew)
    #     result["win_rate"] = win_rate
    #     print(f"Iter={trainer.iteration} win-rate={win_rate} -> ", end="")
    #     # If win rate is good -> Snapshot current policy and play against
    #     # it next, keeping the snapshot fixed and only improving the "main"
    #     # policy.
    #     if win_rate > WIN_RATE_THEWSHOLD:
    #         self.current_opponent += 1
    #         new_pol_id = f"main_v{self.current_opponent}"
    #         print(f"adding new opponent to the mix ({new_pol_id}).")

    #         # Re-define the mapping function, such that "main" is forced
    #         # to play against any of the previously played policies
    #         # (excluding "random").
    #         alternator = Alternator()
    #         def new_policy_mapping_fn(agent_id, **kwargs):
    #             # agent_id = [0|1] -> policy depends on episode ID
    #             # This way, we make sure that both policies sometimes play
    #             # (start player) and sometimes agent1 (player to move 2nd).
    #             selected_pol = "main" if alternator.step_value() == agent_id \
    #                 else "main_v{}".format(np.random.choice(
    #                     list(range(1, self.current_opponent + 1))))
    #             print(f'policy_mapping_fn selected_pol: {selected_pol}\nself.current_opponent: {self.current_opponent}')
    #             return selected_pol

    #         # new_policy = trainer.add_policy(
    #         #     policy_id=new_pol_id,
    #         #     policy_cls=type(trainer.get_policy("main")),
    #         #     policy_mapping_fn=policy_mapping_fn,
    #         # )

    #         trainer.workers.local_worker().policy_config["multiagent"]["policy_mapping_fn"] = new_policy_mapping_fn
    #         trainer.workers.local_worker().policy_mapping_fn = new_policy_mapping_fn

    #         trainer.workers.local_worker().policy_map[new_pol_id] = trainer.get_policy("main")

    #         # for r in trainer.workers.remote_workers():
    #         #     # r.policy_config["multiagent"]["policy_mapping_fn"] = policy_mapping_fn
    #         #     # r.policy_mapping_fn = policy_mapping_fn
    #         #     r.policy_map[new_pol_id] = trainer.get_policy("main")
    #         #     # r.policy_map[new_pol_id].set_state(main_state)

    #         # Set the weights of the new policy to the main policy.
    #         # We'll keep training the main policy, whereas `new_pol_id` will
    #         # remain fixed.
    #         main_state = trainer.get_policy("main").get_state()
    #         # new_policy.set_state(main_state)
    #         trainer.workers.local_worker().policy_map[new_pol_id].set_state(main_state)
    #         # We need to sync the just copied local weights (from main policy)
    #         # to all the remote workers as well.
    #         # trainer.workers.sync_weights()
    #     else:
    #         print("not good enough; will keep learning ...")

    #     # +2 = main + random
    #     result["league_size"] = self.current_opponent + 2


## Stop

In [9]:
stop = {
    "timesteps_total": 15000000,  # 15M
    # "time_total_s": 14400, # 4h
    # "episodes_total": 10,
    # "training_iteration": 1,
}


## Config


In [10]:
NUM_ENVS_PER_WORKER = 4
ENVIRONMENT_ID = "Soccer"

ENVIRONMENT_CONFIG = {
    "num_envs_per_worker": NUM_ENVS_PER_WORKER,
    "variation": EnvType.multiagent_player,
    # "env_channel": env_channel,
}

temp_env = create_custom_env(ENVIRONMENT_CONFIG)
obs_space = temp_env.observation_space
act_space = temp_env.action_space
temp_env.close()

# alternator = Alternator()
# def policy_mapping_fn(agent_id, **kwargs):
#     print('chamando policy_mapping_fn original')
#     # agent_id = [0|1] -> policy depends on episode ID
#     # This way, we make sure that both policies sometimes play agent0
#     # (start player) and sometimes agent1 (player to move 2nd).
#     return "main" if alternator.step_value() == agent_id else "random"

gpu_count = 1
num_workers = 3
num_gpus_for_driver = 1 / (num_workers + 1) # Driver GPU
num_gpus_per_worker = (gpu_count - num_gpus_for_driver) / num_workers if num_workers > 0 else 0

config = {
    # system settings
    "num_gpus": num_gpus_for_driver,
    "num_workers": num_workers,
    "num_envs_per_worker": NUM_ENVS_PER_WORKER,
    "num_cpus_for_driver": 2,
    "num_cpus_per_worker": 2,
    "num_gpus_per_worker": num_gpus_per_worker,
    "log_level": "INFO",
    "framework": "torch",
    # RL setup
    "multiagent": {
        "policies": {
            "main": (None, obs_space, act_space, {}),
        },
        "policy_mapping_fn": lambda _: "main",
        "policies_to_train": ["main"],
    },
    "env": ENVIRONMENT_ID,
    "env_config": {
        **ENVIRONMENT_CONFIG,
        # "render": True,
        # "time_scale": 1,
    },
    "callbacks": SelfPlayCallback,
}

[INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0


INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0


[INFO] Connected new brain: SoccerTwos?team=1


INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1


[INFO] Connected new brain: SoccerTwos?team=0


INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


## Run experiment

### Train PPO SelfPlay

In [11]:
def run_experiment():
    ray.init(num_cpus=8, include_dashboard=False, ignore_reinit_error=True)

    tune.registry.register_env(ENVIRONMENT_ID, create_custom_env)

    analysis = tune.run(
        "PPO",
        num_samples=1,
        name="PPO_multiagent_rewards_1.4",
        config=config,
        stop=stop,
        checkpoint_freq=50,
        checkpoint_at_end=True,
        local_dir="../../ray_results",
        # restore="../../ray_results/PPO_selfplay_1/PPO_Soccer_ID/checkpoint_00X/checkpoint-X",
        # resume=True
    )

    # Gets best trial based on max accuracy across all training iterations.
    best_trial = analysis.get_best_trial("episode_reward_mean", mode="max")
    print(best_trial)
    # Gets best checkpoint for trial based on accuracy.
    best_checkpoint = analysis.get_best_checkpoint(
        trial=best_trial, metric="episode_reward_mean", mode="max"
    )
    print(best_checkpoint)
    print("Done training")
    return analysis, best_trial, best_checkpoint

run_experiment()


Trial name,status,loc
PPO_Soccer_99818_00000,PENDING,


[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected new

[2m[36m(pid=78458)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78454)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78458)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78458)[0m [INFO] Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78454)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78454)[0m [INFO] Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78459)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78459)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78458)[0m 2021-12-10 14:42:58,523	INFO torch_policy.py:148 -- TorchPolicy (worker=3) running on 0.25 GPU(s).
[2m[36m(pid=78454)[0m 2021-12-10 14:42:58,523	INFO torch_policy.py:148 -- TorchPolicy (worker=1) running on 0.25 GPU(s).
[2m[36m(pid=78459)[0m 2021-12-10 14:42:58,523	INFO torch_policy.py:148 -- TorchPolicy (worker=2) running on 0.25 GPU(s).
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78458)[0m INFO:mlagents_

[2m[36m(pid=78458)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78459)[0m [INFO] Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78458)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78458)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78454)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78454)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78454)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78459)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78458)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78459)[0m [INFO] Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78458)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78458)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78454)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78454)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78454)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78459)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78458)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78459)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78458)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78459)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78459)[0m [INFO] Connected new brain: SoccerTwos?team=0
[2m[36m(pid=78458)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78458)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78454)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78454)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=78454)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=78454)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=78457)[0m 2021-12-10 14:43:24,293	INFO torch_policy.py:148 -- TorchPolicy (worker=local) running on 0.25 GPU(s).
[2m[36m(pid=78457)[0m 2021-12-10 14:43:26,715	INFO rollout_worker.py:1199 -- Built policy map: {'main': <ray.rllib.policy.policy_template.PPOTorchPolicy object at 0x7f835027fbe0>}
[2m[36m(pid=78457)[0m 2021-12-10 14:43:26,715	INFO rollout_worker.py:1200 -- Built preprocessor map: {'main': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7f835021c640>}
[2m[36m(pid=78457)[0m 2021-12-10 14:43:26,715	INFO rollout_worker.py:583 -- Built filter map: {'main': <ray.rllib.utils.filter.NoFilter object at 0x7f835041b070>}
[2m[36m(pid=78457)[0m 2021-12-10 14:43:26,718	INFO trainable.py:101 -- Trainable.setup took 32.071 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=78454)[0m 2021-12-10 14:43:26,742	INFO rollout_worker.py:723 -- Generating sample batch of size 13

Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 31968
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.09728485505489814
    agent_0_total_ball_to_goal_speed_reward_mean: 0.032428285018299384
    agent_0_total_ball_to_goal_speed_reward_min: 0.0
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_14-45-20
  done: false
  episode_len_mean: 309.0
  episode_media: {}
  episode_reward_max: 0.11174192541708239
  episode_reward_mean: -0.2056598825681235
  episode_reward_min: -0.8217776877294751
  episodes_this_iter: 3
  episodes_total: 3
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.2
          cur_lr: 5.0000000000000016e-05
          entropy: 3.269151714324951
          entropy_coeff: 0.0
          kl: 0.026881471447646618
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,1,113.363,7992,-0.20566,0.111742,-0.821778,309


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 63936
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.24623458681754068
    agent_0_total_ball_to_goal_speed_reward_mean: -0.02060156918429559
    agent_0_total_ball_to_goal_speed_reward_min: -0.31071368664981575
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_14-47-12
  done: false
  episode_len_mean: 836.7142857142857
  episode_media: {}
  episode_reward_max: 0.2290840970655967
  episode_reward_mean: -0.4992387167970335
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 11
  episodes_total: 14
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 3.2449067277908323
          entropy_coeff: 0.0
          kl:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,2,225.579,15984,-0.499239,0.229084,-1.90254,836.714


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 95904
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.24623458681754068
    agent_0_total_ball_to_goal_speed_reward_mean: -0.025806652258028372
    agent_0_total_ball_to_goal_speed_reward_min: -0.31071368664981575
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_14-49-03
  done: false
  episode_len_mean: 775.2380952380952
  episode_media: {}
  episode_reward_max: 0.4459429308364111
  episode_reward_mean: -0.43640088748686007
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 7
  episodes_total: 21
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 3.219842350959778
          entropy_coeff: 0.0
          kl:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,3,336.872,23976,-0.436401,0.445943,-1.90254,775.238


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 127872
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.3513794674076383
    agent_0_total_ball_to_goal_speed_reward_mean: 0.010707858583108246
    agent_0_total_ball_to_goal_speed_reward_min: -0.31071368664981575
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_14-51-03
  done: false
  episode_len_mean: 715.5
  episode_media: {}
  episode_reward_max: 0.8576535264391476
  episode_reward_mean: -0.28574559436406305
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 17
  episodes_total: 38
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 3.1988379011154175
          entropy_coeff: 0.0
          kl: 0.01969517

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,4,456.923,31968,-0.285746,0.857654,-1.90254,715.5


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 159840
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.3805253480926408
    agent_0_total_ball_to_goal_speed_reward_mean: 0.026408206733916688
    agent_0_total_ball_to_goal_speed_reward_min: -0.31071368664981575
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_14-53-04
  done: false
  episode_len_mean: 718.8085106382979
  episode_media: {}
  episode_reward_max: 0.8576535264391476
  episode_reward_mean: -0.31118105012600555
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 9
  episodes_total: 47
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 3.1771447057724
          entropy_coeff: 0.0
          kl: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,5,578.004,39960,-0.311181,0.857654,-1.90254,718.809


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 191808
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.3805253480926408
    agent_0_total_ball_to_goal_speed_reward_mean: 0.023656723958151764
    agent_0_total_ball_to_goal_speed_reward_min: -0.31527870872636465
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_14-55-05
  done: false
  episode_len_mean: 695.9032258064516
  episode_media: {}
  episode_reward_max: 0.8576535264391476
  episode_reward_mean: -0.26271791876276573
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 15
  episodes_total: 62
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.3
          cur_lr: 5.0000000000000016e-05
          entropy: 3.1517552518844605
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,6,698.077,47952,-0.262718,0.857654,-1.90254,695.903


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 223776
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.4531517158426204
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02331241706010743
    agent_0_total_ball_to_goal_speed_reward_min: -0.31527870872636465
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_14-56-57
  done: false
  episode_len_mean: 708.5277777777778
  episode_media: {}
  episode_reward_max: 0.9217838478646715
  episode_reward_mean: -0.24505779723393928
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 10
  episodes_total: 72
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 3.143507083892822
          entropy_coeff: 0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,7,810.741,55944,-0.245058,0.921784,-1.90254,708.528


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 255744
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.4531517158426204
    agent_0_total_ball_to_goal_speed_reward_mean: 0.016839161085087036
    agent_0_total_ball_to_goal_speed_reward_min: -0.47843032964343485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_14-58-49
  done: false
  episode_len_mean: 693.5421686746988
  episode_media: {}
  episode_reward_max: 0.9217838478646715
  episode_reward_mean: -0.23651238361561533
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 11
  episodes_total: 83
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 3.1195417165756227
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,8,921.924,63936,-0.236512,0.921784,-1.90254,693.542


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 287712
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5265592951549064
    agent_0_total_ball_to_goal_speed_reward_mean: 0.031999395938698154
    agent_0_total_ball_to_goal_speed_reward_min: -0.47843032964343485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-00-41
  done: false
  episode_len_mean: 719.3473684210526
  episode_media: {}
  episode_reward_max: 0.9217838478646715
  episode_reward_mean: -0.2100777564140678
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 12
  episodes_total: 95
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 3.1010805931091308
          entropy_coeff: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,9,1034.45,71928,-0.210078,0.921784,-1.90254,719.347


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 319680
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7109492516866475
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03790285296298854
    agent_0_total_ball_to_goal_speed_reward_min: -0.47843032964343485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-02-32
  done: false
  episode_len_mean: 740.08
  episode_media: {}
  episode_reward_max: 0.9217838478646715
  episode_reward_mean: -0.21492309447811503
  episode_reward_min: -1.9025360697938178
  episodes_this_iter: 7
  episodes_total: 102
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 3.082317168235779
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,10,1144.92,79920,-0.214923,0.921784,-1.90254,740.08


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 351648
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7109492516866475
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05085254738605124
    agent_0_total_ball_to_goal_speed_reward_min: -0.47843032964343485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-04-22
  done: false
  episode_len_mean: 725.98
  episode_media: {}
  episode_reward_max: 0.9217838478646715
  episode_reward_mean: -0.1372994479067455
  episode_reward_min: -1.5974366302777525
  episodes_this_iter: 10
  episodes_total: 112
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 3.062376546859741
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,11,1255.44,87912,-0.137299,0.921784,-1.59744,725.98


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 383616
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7109492516866475
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06523087917057578
    agent_0_total_ball_to_goal_speed_reward_min: -0.47843032964343485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-06-18
  done: false
  episode_len_mean: 705.38
  episode_media: {}
  episode_reward_max: 0.9217838478646715
  episode_reward_mean: -0.13733928360578104
  episode_reward_min: -1.677296420066651
  episodes_this_iter: 16
  episodes_total: 128
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 3.0411830492019654
          entropy_coeff: 0.0
         



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,12,1370.64,95904,-0.137339,0.921784,-1.6773,705.38


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 415584
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7109492516866475
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07393095056803207
    agent_0_total_ball_to_goal_speed_reward_min: -0.47843032964343485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-08-22
  done: false
  episode_len_mean: 712.24
  episode_media: {}
  episode_reward_max: 0.9217838478646715
  episode_reward_mean: -0.13989980881760383
  episode_reward_min: -1.677296420066651
  episodes_this_iter: 10
  episodes_total: 138
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 3.024588822364807
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,13,1493.12,103896,-0.1399,0.921784,-1.6773,712.24


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 447552
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7109492516866475
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08458801065545567
    agent_0_total_ball_to_goal_speed_reward_min: -0.47843032964343485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-10-22
  done: false
  episode_len_mean: 697.5
  episode_media: {}
  episode_reward_max: 0.9217838478646715
  episode_reward_mean: -0.061765902312208555
  episode_reward_min: -1.677296420066651
  episodes_this_iter: 17
  episodes_total: 155
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.997699625968933
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,14,1613.32,111888,-0.0617659,0.921784,-1.6773,697.5


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 479520
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7109492516866475
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09219621929332224
    agent_0_total_ball_to_goal_speed_reward_min: -0.47843032964343485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-12-20
  done: false
  episode_len_mean: 634.45
  episode_media: {}
  episode_reward_max: 0.8805780482511579
  episode_reward_mean: -0.06512175199083113
  episode_reward_min: -1.677296420066651
  episodes_this_iter: 18
  episodes_total: 173
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.9718074111938475
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,15,1731.25,119880,-0.0651218,0.880578,-1.6773,634.45


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 511488
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5997550873148613
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0902458090489127
    agent_0_total_ball_to_goal_speed_reward_min: -0.4573456022342602
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-14-11
  done: false
  episode_len_mean: 516.68
  episode_media: {}
  episode_reward_max: 0.8805780482511579
  episode_reward_mean: -0.03697233697707178
  episode_reward_min: -1.677296420066651
  episodes_this_iter: 26
  episodes_total: 199
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.957978724479675
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,16,1842.1,127872,-0.0369723,0.880578,-1.6773,516.68


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 543456
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5997550873148613
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07957117672390206
    agent_0_total_ball_to_goal_speed_reward_min: -0.5230697811764543
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-16-00
  done: false
  episode_len_mean: 424.77
  episode_media: {}
  episode_reward_max: 0.8910615750749931
  episode_reward_mean: -0.08892658800960236
  episode_reward_min: -3.0122025516785746
  episodes_this_iter: 27
  episodes_total: 226
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.941058054924011
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,17,1951.54,135864,-0.0889266,0.891062,-3.0122,424.77


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 575424
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5997550873148613
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06413194695101297
    agent_0_total_ball_to_goal_speed_reward_min: -0.5992973890040367
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-17-50
  done: false
  episode_len_mean: 303.25
  episode_media: {}
  episode_reward_max: 1.2189818340163963
  episode_reward_mean: -0.10471627265551225
  episode_reward_min: -3.0122025516785746
  episodes_this_iter: 36
  episodes_total: 262
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.909072337150574
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,18,2061.05,143856,-0.104716,1.21898,-3.0122,303.25


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 607392
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7329101350694548
    agent_0_total_ball_to_goal_speed_reward_mean: 0.052136528826457536
    agent_0_total_ball_to_goal_speed_reward_min: -0.5992973890040367
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-19-39
  done: false
  episode_len_mean: 250.42
  episode_media: {}
  episode_reward_max: 1.2189818340163963
  episode_reward_mean: -0.10664419155304226
  episode_reward_min: -3.0122025516785746
  episodes_this_iter: 35
  episodes_total: 297
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.8785661487579346
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,19,2170.07,151848,-0.106644,1.21898,-3.0122,250.42


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 639360
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7329101350694548
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06404511748613363
    agent_0_total_ball_to_goal_speed_reward_min: -0.9029321359531005
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-21-33
  done: false
  episode_len_mean: 237.08
  episode_media: {}
  episode_reward_max: 1.2189818340163963
  episode_reward_mean: -0.045687319179986724
  episode_reward_min: -1.9944709211565634
  episodes_this_iter: 29
  episodes_total: 326
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.8709294233322145
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,20,2284.34,159840,-0.0456873,1.21898,-1.99447,237.08


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 671328
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7329101350694548
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09923041798108077
    agent_0_total_ball_to_goal_speed_reward_min: -0.9029321359531005
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-23-28
  done: false
  episode_len_mean: 231.12
  episode_media: {}
  episode_reward_max: 0.8103790845533876
  episode_reward_mean: -0.004615616593443983
  episode_reward_min: -1.4792459759102874
  episodes_this_iter: 43
  episodes_total: 369
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.817068691253662
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,21,2398.5,167832,-0.00461562,0.810379,-1.47925,231.12


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 703296
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8437270039645854
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10688457741459388
    agent_0_total_ball_to_goal_speed_reward_min: -0.839807942963664
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-25-25
  done: false
  episode_len_mean: 185.03
  episode_media: {}
  episode_reward_max: 0.932574707075625
  episode_reward_mean: 0.06730597864202278
  episode_reward_min: -1.205223379807408
  episodes_this_iter: 59
  episodes_total: 428
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.7610202007293703
          entropy_coeff: 0.0
          kl:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,22,2515.48,175824,0.067306,0.932575,-1.20522,185.03


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 735264
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8437270039645854
    agent_0_total_ball_to_goal_speed_reward_mean: 0.1424227799617964
    agent_0_total_ball_to_goal_speed_reward_min: -0.24260932429047677
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-27-17
  done: false
  episode_len_mean: 126.7
  episode_media: {}
  episode_reward_max: 0.932574707075625
  episode_reward_mean: 0.08937245620887933
  episode_reward_min: -2.246940910265278
  episodes_this_iter: 53
  episodes_total: 481
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.7700326137542723
          entropy_coeff: 0.0
          kl:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,23,2627.25,183816,0.0893725,0.932575,-2.24694,126.7


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 767232
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7183933097989852
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10761680694016858
    agent_0_total_ball_to_goal_speed_reward_min: -0.49826279149159725
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-29-15
  done: false
  episode_len_mean: 126.39
  episode_media: {}
  episode_reward_max: 0.9985830055862761
  episode_reward_mean: 0.0743004111433906
  episode_reward_min: -2.246940910265278
  episodes_this_iter: 64
  episodes_total: 545
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.6890694246292113
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,24,2745.28,191808,0.0743004,0.998583,-2.24694,126.39


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 799200
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5636678695025463
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06322482055418936
    agent_0_total_ball_to_goal_speed_reward_min: -0.6075340994450533
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-31-11
  done: false
  episode_len_mean: 130.31
  episode_media: {}
  episode_reward_max: 1.1427115969242434
  episode_reward_mean: 0.07149776998470184
  episode_reward_min: -2.3121816392503156
  episodes_this_iter: 53
  episodes_total: 598
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.7203041400909425
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,25,2861.75,199800,0.0714978,1.14271,-2.31218,130.31


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 831168
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.771068064432019
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05607933240581566
    agent_0_total_ball_to_goal_speed_reward_min: -0.6075340994450533
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-33-03
  done: false
  episode_len_mean: 141.98
  episode_media: {}
  episode_reward_max: 0.870708301059421
  episode_reward_mean: -0.0053111612048622955
  episode_reward_min: -2.3121816392503156
  episodes_this_iter: 58
  episodes_total: 656
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.6730389347076415
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,26,2973.88,207792,-0.00531116,0.870708,-2.31218,141.98


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 863136
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.771068064432019
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09243537420106619
    agent_0_total_ball_to_goal_speed_reward_min: -0.5736994562224772
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-34-55
  done: false
  episode_len_mean: 150.96
  episode_media: {}
  episode_reward_max: 1.0793398397042826
  episode_reward_mean: 0.03803548496732454
  episode_reward_min: -1.4597340509679881
  episodes_this_iter: 50
  episodes_total: 706
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.68263383102417
          entropy_coeff: 0.0
          kl:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,27,3085.82,215784,0.0380355,1.07934,-1.45973,150.96


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 895104
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6921799614884523
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08776757250402464
    agent_0_total_ball_to_goal_speed_reward_min: -0.4663155306085428
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-36-53
  done: false
  episode_len_mean: 156.64
  episode_media: {}
  episode_reward_max: 1.1405092772458008
  episode_reward_mean: 0.08461530655729672
  episode_reward_min: -1.398619475200428
  episodes_this_iter: 53
  episodes_total: 759
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.641651237487793
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,28,3203.43,223776,0.0846153,1.14051,-1.39862,156.64


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 927072
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6921799614884523
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04065200979685645
    agent_0_total_ball_to_goal_speed_reward_min: -0.4067520247798571
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-38-54
  done: false
  episode_len_mean: 137.88
  episode_media: {}
  episode_reward_max: 1.1405092772458008
  episode_reward_mean: 0.05425221089074629
  episode_reward_min: -1.849520038727472
  episodes_this_iter: 60
  episodes_total: 819
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5858803968429567
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,29,3324.27,231768,0.0542522,1.14051,-1.84952,137.88


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 959040
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7297111617201453
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08672498819057514
    agent_0_total_ball_to_goal_speed_reward_min: -0.4067520247798571
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-41-11
  done: false
  episode_len_mean: 118.9
  episode_media: {}
  episode_reward_max: 1.083146464167112
  episode_reward_mean: 0.16698721565737557
  episode_reward_min: -1.2925118274570742
  episodes_this_iter: 73
  episodes_total: 892
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.5601398258209227
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,30,3460.84,239760,0.166987,1.08315,-1.29251,118.9


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 991008
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6722871554640288
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07844392079448068
    agent_0_total_ball_to_goal_speed_reward_min: -0.4076640462382134
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-43-14
  done: false
  episode_len_mean: 111.91
  episode_media: {}
  episode_reward_max: 1.2448599792498334
  episode_reward_mean: 0.08397222226237165
  episode_reward_min: -1.1965523428205336
  episodes_this_iter: 62
  episodes_total: 954
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.533053985595703
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,31,3583.63,247752,0.0839722,1.24486,-1.19655,111.91


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1022976
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8603634830596627
    agent_0_total_ball_to_goal_speed_reward_mean: 0.11519017689362414
    agent_0_total_ball_to_goal_speed_reward_min: -0.6226283454767704
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-45-13
  done: false
  episode_len_mean: 115.17
  episode_media: {}
  episode_reward_max: 1.2448599792498334
  episode_reward_mean: 0.12563660910577978
  episode_reward_min: -1.4534806654779606
  episodes_this_iter: 88
  episodes_total: 1042
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.475295141220093
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,32,3703.31,255744,0.125637,1.24486,-1.45348,115.17


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1054944
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9075239175113855
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07793838753751972
    agent_0_total_ball_to_goal_speed_reward_min: -0.6226283454767704
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-47-15
  done: false
  episode_len_mean: 95.37
  episode_media: {}
  episode_reward_max: 1.0938584137918874
  episode_reward_mean: 0.15600843002202866
  episode_reward_min: -1.7521982655652095
  episodes_this_iter: 68
  episodes_total: 1110
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.490173532485962
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,33,3825.39,263736,0.156008,1.09386,-1.7522,95.37


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1086912
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.826072581358564
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08095938037158802
    agent_0_total_ball_to_goal_speed_reward_min: -0.48254579581934476
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-49-15
  done: false
  episode_len_mean: 125.53
  episode_media: {}
  episode_reward_max: 0.9789646511915058
  episode_reward_mean: 0.09574572523461806
  episode_reward_min: -1.7521982655652095
  episodes_this_iter: 67
  episodes_total: 1177
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.469194839477539
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,34,3945.14,271728,0.0957457,0.978965,-1.7522,125.53


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1118880
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7127832576066483
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0505140052837348
    agent_0_total_ball_to_goal_speed_reward_min: -0.6497480008230457
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-51-17
  done: false
  episode_len_mean: 112.94
  episode_media: {}
  episode_reward_max: 0.9789646511915058
  episode_reward_mean: 0.09164105369288848
  episode_reward_min: -2.7613674356939875
  episodes_this_iter: 59
  episodes_total: 1236
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4817667541503905
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,35,4066.6,279720,0.0916411,0.978965,-2.76137,112.94


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1150848
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8407122768172964
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05141835836763148
    agent_0_total_ball_to_goal_speed_reward_min: -0.7500155363549612
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-53-19
  done: false
  episode_len_mean: 135.76
  episode_media: {}
  episode_reward_max: 0.9081537808887484
  episode_reward_mean: -0.01101272360908278
  episode_reward_min: -2.7613674356939875
  episodes_this_iter: 68
  episodes_total: 1304
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4089397478103636
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,36,4188.61,287712,-0.0110127,0.908154,-2.76137,135.76


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1182816
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6898266692408466
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06113971660986652
    agent_0_total_ball_to_goal_speed_reward_min: -0.996474616418056
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-55-20
  done: false
  episode_len_mean: 103.45
  episode_media: {}
  episode_reward_max: 1.0801172352851902
  episode_reward_mean: 0.12498581169654858
  episode_reward_min: -1.7101148783258373
  episodes_this_iter: 84
  episodes_total: 1388
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.4030255756378174
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,37,4309.34,295704,0.124986,1.08012,-1.71011,103.45


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1214784
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1443359136909788
    agent_0_total_ball_to_goal_speed_reward_mean: 0.061976191918488646
    agent_0_total_ball_to_goal_speed_reward_min: -0.901941717027658
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-57-21
  done: false
  episode_len_mean: 99.17
  episode_media: {}
  episode_reward_max: 0.9076263812960603
  episode_reward_mean: 0.10892567334591831
  episode_reward_min: -1.7314555403387946
  episodes_this_iter: 75
  episodes_total: 1463
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3960438346862794
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,38,4430.8,303696,0.108926,0.907626,-1.73146,99.17


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1246752
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9352401932772264
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07587433068737347
    agent_0_total_ball_to_goal_speed_reward_min: -0.46651236914002364
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_15-59-23
  done: false
  episode_len_mean: 81.14
  episode_media: {}
  episode_reward_max: 1.1843438301918083
  episode_reward_mean: 0.14454232642139636
  episode_reward_min: -1.4198967138648237
  episodes_this_iter: 95
  episodes_total: 1558
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.3414823656082153
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,39,4552.5,311688,0.144542,1.18434,-1.4199,81.14


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1278720
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6449469878352555
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0668259015370779
    agent_0_total_ball_to_goal_speed_reward_min: -0.4442072219104475
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-01-27
  done: false
  episode_len_mean: 102.71
  episode_media: {}
  episode_reward_max: 1.1410214104210334
  episode_reward_mean: 0.08278330938032435
  episode_reward_min: -1.5052915843752352
  episodes_this_iter: 77
  episodes_total: 1635
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.348832069396973
          entropy_coeff: 0.0
          



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,40,4676.87,319680,0.0827833,1.14102,-1.50529,102.71


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1310688
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7761991944739829
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07577451324795631
    agent_0_total_ball_to_goal_speed_reward_min: -0.6801506588093417
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-03-35
  done: false
  episode_len_mean: 99.57
  episode_media: {}
  episode_reward_max: 1.1820370588028553
  episode_reward_mean: 0.08344590693747328
  episode_reward_min: -1.6902636413953984
  episodes_this_iter: 76
  episodes_total: 1711
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.296562340736389
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,41,4801.06,327672,0.0834459,1.18204,-1.69026,99.57


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1342656
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8645251886638065
    agent_0_total_ball_to_goal_speed_reward_mean: 0.030862373906532646
    agent_0_total_ball_to_goal_speed_reward_min: -0.6801506588093417
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-05-38
  done: false
  episode_len_mean: 100.95
  episode_media: {}
  episode_reward_max: 1.111144243489612
  episode_reward_mean: 0.14122505745607739
  episode_reward_min: -1.4655674838262396
  episodes_this_iter: 88
  episodes_total: 1799
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.2706633129119873
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,42,4923.8,335664,0.141225,1.11114,-1.46557,100.95


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1374624
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8955261795651522
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09884649885227416
    agent_0_total_ball_to_goal_speed_reward_min: -0.5297169058166485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-07-44
  done: false
  episode_len_mean: 89.98
  episode_media: {}
  episode_reward_max: 1.1385759329751544
  episode_reward_mean: 0.15074370216796498
  episode_reward_min: -1.604489454676513
  episodes_this_iter: 85
  episodes_total: 1884
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.2314757795333864
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,43,5049.91,343656,0.150744,1.13858,-1.60449,89.98


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1406592
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8251652392629054
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08227691157480557
    agent_0_total_ball_to_goal_speed_reward_min: -0.5855389140574512
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-09-48
  done: false
  episode_len_mean: 91.99
  episode_media: {}
  episode_reward_max: 1.446415523769038
  episode_reward_mean: 0.15296505020812848
  episode_reward_min: -0.8821487414322287
  episodes_this_iter: 85
  episodes_total: 1969
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.2143423261642456
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,44,5173.93,351648,0.152965,1.44642,-0.882149,91.99


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1438560
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8369842400470648
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08452115167932459
    agent_0_total_ball_to_goal_speed_reward_min: -0.46950123808874633
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-11-52
  done: false
  episode_len_mean: 88.45
  episode_media: {}
  episode_reward_max: 1.3852194845423527
  episode_reward_mean: 0.11745030889760052
  episode_reward_min: -1.2496425219134633
  episodes_this_iter: 93
  episodes_total: 2062
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.19003227519989
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,45,5297.78,359640,0.11745,1.38522,-1.24964,88.45


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1470528
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8764844755697462
    agent_0_total_ball_to_goal_speed_reward_mean: 0.028975687606940092
    agent_0_total_ball_to_goal_speed_reward_min: -0.5305499179853442
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-13-56
  done: false
  episode_len_mean: 85.19
  episode_media: {}
  episode_reward_max: 1.1473576946456197
  episode_reward_mean: 0.09852802874356226
  episode_reward_min: -1.2496937486618414
  episodes_this_iter: 90
  episodes_total: 2152
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.1561039190292357
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,46,5422.09,367632,0.098528,1.14736,-1.24969,85.19


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1502496
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9361416048733182
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09451021870619837
    agent_0_total_ball_to_goal_speed_reward_min: -0.5028830102570857
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-15-52
  done: false
  episode_len_mean: 80.03738317757009
  episode_media: {}
  episode_reward_max: 1.22152306165669
  episode_reward_mean: 0.06124683481608093
  episode_reward_min: -0.8966834528142031
  episodes_this_iter: 107
  episodes_total: 2259
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.103758729457855
          entropy_coeff: 0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,47,5537.63,375624,0.0612468,1.22152,-0.896683,80.0374


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1534464
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5916107636508037
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07078356821607915
    agent_0_total_ball_to_goal_speed_reward_min: -0.9146442166436733
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-17-48
  done: false
  episode_len_mean: 82.11
  episode_media: {}
  episode_reward_max: 1.166731619782749
  episode_reward_mean: 0.10730161410267261
  episode_reward_min: -1.3835459917602568
  episodes_this_iter: 99
  episodes_total: 2358
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.1015295910835268
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,48,5653.33,383616,0.107302,1.16673,-1.38355,82.11


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1566432
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8214846991260323
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09509526634553477
    agent_0_total_ball_to_goal_speed_reward_min: -0.5353433647658811
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-19-46
  done: false
  episode_len_mean: 75.86538461538461
  episode_media: {}
  episode_reward_max: 1.083836813844317
  episode_reward_mean: 0.18570159756475524
  episode_reward_min: -0.7240535729755913
  episodes_this_iter: 104
  episodes_total: 2462
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.0710823574066164
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,49,5771.68,391608,0.185702,1.08384,-0.724054,75.8654


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1598400
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7905157311914756
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09599287327450129
    agent_0_total_ball_to_goal_speed_reward_min: -0.44234565829501704
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-21-47
  done: false
  episode_len_mean: 84.38
  episode_media: {}
  episode_reward_max: 1.2871376906518812
  episode_reward_mean: 0.08242733873646163
  episode_reward_min: -1.2789570367172307
  episodes_this_iter: 93
  episodes_total: 2555
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.0506978244781493
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,50,5891.97,399600,0.0824273,1.28714,-1.27896,84.38


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1630368
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7132817220343212
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09218601984770787
    agent_0_total_ball_to_goal_speed_reward_min: -0.4791945859691698
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-23-53
  done: false
  episode_len_mean: 70.61538461538461
  episode_media: {}
  episode_reward_max: 1.4215164563728044
  episode_reward_mean: 0.1845456734207123
  episode_reward_min: -0.770935093753462
  episodes_this_iter: 104
  episodes_total: 2659
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 2.008533737182617
          entropy_coeff: 0.0



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,51,6018.42,407592,0.184546,1.42152,-0.770935,70.6154


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1662336
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9505892526127697
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07239659390781451
    agent_0_total_ball_to_goal_speed_reward_min: -0.4500886229114827
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-25-51
  done: false
  episode_len_mean: 78.0925925925926
  episode_media: {}
  episode_reward_max: 1.059681941404515
  episode_reward_mean: 0.21431818837135155
  episode_reward_min: -0.9008795587103222
  episodes_this_iter: 108
  episodes_total: 2767
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 1.9811372237205505
          entropy_coeff: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,52,6135.43,415584,0.214318,1.05968,-0.90088,78.0926


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1694304
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.701456817370238
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04421958418214598
    agent_0_total_ball_to_goal_speed_reward_min: -0.4589041312411541
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-27-47
  done: false
  episode_len_mean: 71.76146788990826
  episode_media: {}
  episode_reward_max: 1.3391532891456097
  episode_reward_mean: 0.09202048484575627
  episode_reward_min: -1.4771380528389066
  episodes_this_iter: 109
  episodes_total: 2876
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 1.96769904756546
          entropy_coeff: 0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,53,6251.01,423576,0.0920205,1.33915,-1.47714,71.7615


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1726272
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7607122204640752
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06637117223714527
    agent_0_total_ball_to_goal_speed_reward_min: -0.6006938647860443
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-29-42
  done: false
  episode_len_mean: 78.45045045045045
  episode_media: {}
  episode_reward_max: 1.0308981585720627
  episode_reward_mean: 0.1309366030712793
  episode_reward_min: -1.1021441458781278
  episodes_this_iter: 111
  episodes_total: 2987
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 1.9297397966384888
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,54,6366.3,431568,0.130937,1.0309,-1.10214,78.4505


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1758240
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0682727172607236
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10047655023606371
    agent_0_total_ball_to_goal_speed_reward_min: -0.40671965141658817
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-31-38
  done: false
  episode_len_mean: 78.73
  episode_media: {}
  episode_reward_max: 1.684876410034489
  episode_reward_mean: 0.1342696307345293
  episode_reward_min: -1.382387296210911
  episodes_this_iter: 98
  episodes_total: 3085
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 1.926538782596588
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,55,6482.27,439560,0.13427,1.68488,-1.38239,78.73


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1790208
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7770735575765354
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0897513262537277
    agent_0_total_ball_to_goal_speed_reward_min: -0.49839428224668014
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-33-35
  done: false
  episode_len_mean: 75.39423076923077
  episode_media: {}
  episode_reward_max: 0.9587070330552738
  episode_reward_mean: 0.1837474118220133
  episode_reward_min: -1.07755418437084
  episodes_this_iter: 104
  episodes_total: 3189
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 1.9104894261360168
          entropy_coeff: 0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,56,6598.85,447552,0.183747,0.958707,-1.07755,75.3942


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1822176
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1123519633450805
    agent_0_total_ball_to_goal_speed_reward_mean: 0.12957073450769876
    agent_0_total_ball_to_goal_speed_reward_min: -0.5523995905914408
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-35-30
  done: false
  episode_len_mean: 66.50833333333334
  episode_media: {}
  episode_reward_max: 1.2397863658210748
  episode_reward_mean: 0.20881047798246552
  episode_reward_min: -0.7196357383419096
  episodes_this_iter: 120
  episodes_total: 3309
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.0000000000000016e-05
          entropy: 1.8630052576065064
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,57,6714.06,455544,0.20881,1.23979,-0.719636,66.5083


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1854144
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9216974411280054
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05379508004018465
    agent_0_total_ball_to_goal_speed_reward_min: -0.43415142214591357
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-37-25
  done: false
  episode_len_mean: 65.89075630252101
  episode_media: {}
  episode_reward_max: 1.3350613994549523
  episode_reward_mean: 0.14245552521759808
  episode_reward_min: -0.9808505287796434
  episodes_this_iter: 119
  episodes_total: 3428
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.8435912094116211
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,58,6829.25,463536,0.142456,1.33506,-0.980851,65.8908


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1886112
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6115712107206795
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07273859829933711
    agent_0_total_ball_to_goal_speed_reward_min: -0.5518049554450536
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-39-21
  done: false
  episode_len_mean: 76.41666666666667
  episode_media: {}
  episode_reward_max: 1.3507456886387041
  episode_reward_mean: 0.13293031434767286
  episode_reward_min: -2.1643593717615333
  episodes_this_iter: 108
  episodes_total: 3536
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.852336555480957
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,59,6944.95,471528,0.13293,1.35075,-2.16436,76.4167


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1918080
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9290114218846391
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10783750746387419
    agent_0_total_ball_to_goal_speed_reward_min: -0.44824232203519154
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-41-17
  done: false
  episode_len_mean: 84.95
  episode_media: {}
  episode_reward_max: 0.892296127735387
  episode_reward_mean: 0.1770057621608309
  episode_reward_min: -1.2621554646725976
  episodes_this_iter: 87
  episodes_total: 3623
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.8665865740776062
          entropy_coeff: 0.0
          kl: 0.0170507

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,60,7060.92,479520,0.177006,0.892296,-1.26216,84.95


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1950048
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6554977551978007
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0952274350315007
    agent_0_total_ball_to_goal_speed_reward_min: -0.736226201134685
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-43-14
  done: false
  episode_len_mean: 62.94615384615385
  episode_media: {}
  episode_reward_max: 1.4920755336684832
  episode_reward_mean: 0.1942600641168125
  episode_reward_min: -1.2461782622053577
  episodes_this_iter: 130
  episodes_total: 3753
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.7828270049095154
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,61,7177.65,487512,0.19426,1.49208,-1.24618,62.9462


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 1982016
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6952220429104756
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05157388166700274
    agent_0_total_ball_to_goal_speed_reward_min: -0.5074896316315114
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-45-11
  done: false
  episode_len_mean: 76.70476190476191
  episode_media: {}
  episode_reward_max: 1.225722871174944
  episode_reward_mean: 0.121832029386902
  episode_reward_min: -0.8575380232318381
  episodes_this_iter: 105
  episodes_total: 3858
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.8065487594604492
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,62,7294.57,495504,0.121832,1.22572,-0.857538,76.7048


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2013984
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5992188318296012
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06704836639009541
    agent_0_total_ball_to_goal_speed_reward_min: -0.540416371316864
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-47-07
  done: false
  episode_len_mean: 73.61
  episode_media: {}
  episode_reward_max: 1.8179328220432884
  episode_reward_mean: 0.11460813498577803
  episode_reward_min: -0.8798360333558142
  episodes_this_iter: 98
  episodes_total: 3956
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.7912957062721253
          entropy_coeff: 0.0
          kl: 0.0169920

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,63,7410.7,503496,0.114608,1.81793,-0.879836,73.61


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2045952
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7149858811064288
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06649446206271768
    agent_0_total_ball_to_goal_speed_reward_min: -0.6148296250783024
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-49-03
  done: false
  episode_len_mean: 76.90598290598291
  episode_media: {}
  episode_reward_max: 0.9039982635452924
  episode_reward_mean: 0.131043666729228
  episode_reward_min: -1.141567373170491
  episodes_this_iter: 117
  episodes_total: 4073
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.7431410474777222
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,64,7526.49,511488,0.131044,0.903998,-1.14157,76.906


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2077920
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0010207064096965
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10488424690084526
    agent_0_total_ball_to_goal_speed_reward_min: -0.44333721315322383
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-51-00
  done: false
  episode_len_mean: 71.41904761904762
  episode_media: {}
  episode_reward_max: 1.53405735614608
  episode_reward_mean: 0.1895554291318477
  episode_reward_min: -1.3868085524526466
  episodes_this_iter: 105
  episodes_total: 4178
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.7454988770484925
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,65,7643.63,519480,0.189555,1.53406,-1.38681,71.419


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2109888
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6945263536345533
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04663782895544311
    agent_0_total_ball_to_goal_speed_reward_min: -0.9609292149454739
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-52-57
  done: false
  episode_len_mean: 73.86842105263158
  episode_media: {}
  episode_reward_max: 1.7266261375826877
  episode_reward_mean: 0.18946862041547313
  episode_reward_min: -0.7546274310293408
  episodes_this_iter: 114
  episodes_total: 4292
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.7145503845214844
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,66,7759.93,527472,0.189469,1.72663,-0.754627,73.8684


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2141856
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6863749495909451
    agent_0_total_ball_to_goal_speed_reward_mean: 0.11949636645070814
    agent_0_total_ball_to_goal_speed_reward_min: -0.4110060089114687
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-54-57
  done: false
  episode_len_mean: 65.81739130434782
  episode_media: {}
  episode_reward_max: 0.9903700204626835
  episode_reward_mean: 0.2322745680603212
  episode_reward_min: -0.6513431950957456
  episodes_this_iter: 115
  episodes_total: 4407
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.6983989930152894
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,67,7880.27,535464,0.232275,0.99037,-0.651343,65.8174


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2173824
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8216064828973567
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05767911267305805
    agent_0_total_ball_to_goal_speed_reward_min: -0.47911500557504094
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-56-56
  done: false
  episode_len_mean: 69.8
  episode_media: {}
  episode_reward_max: 1.5931609534429332
  episode_reward_mean: 0.1798005750341291
  episode_reward_min: -0.8762566280715562
  episodes_this_iter: 120
  episodes_total: 4527
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.650914800643921
          entropy_coeff: 0.0
          kl: 0.0175105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,68,7999.25,543456,0.179801,1.59316,-0.876257,69.8


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2205792
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8046757656354032
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07289764140679898
    agent_0_total_ball_to_goal_speed_reward_min: -0.6477575988261309
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_16-58-56
  done: false
  episode_len_mean: 70.33035714285714
  episode_media: {}
  episode_reward_max: 0.9982254310134299
  episode_reward_mean: 0.10136896286330775
  episode_reward_min: -1.3536710656797046
  episodes_this_iter: 112
  episodes_total: 4639
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.6631945371627808
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,69,8119.3,551448,0.101369,0.998225,-1.35367,70.3304


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2237760
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7656718263356665
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07049847784024894
    agent_0_total_ball_to_goal_speed_reward_min: -0.631113928126711
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-00-47
  done: false
  episode_len_mean: 58.956204379562045
  episode_media: {}
  episode_reward_max: 1.176101146070894
  episode_reward_mean: 0.1431458141989732
  episode_reward_min: -1.1504010522708725
  episodes_this_iter: 137
  episodes_total: 4776
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.6274938740730285
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,70,8229.63,559440,0.143146,1.1761,-1.1504,58.9562


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2269728
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0384697137147507
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07322098948818032
    agent_0_total_ball_to_goal_speed_reward_min: -0.5554887561666577
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-02-34
  done: false
  episode_len_mean: 67.44067796610169
  episode_media: {}
  episode_reward_max: 1.4949260437103362
  episode_reward_mean: 0.12717444546221787
  episode_reward_min: -0.8200673632733375
  episodes_this_iter: 118
  episodes_total: 4894
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.6198496165275573
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,71,8336.8,567432,0.127174,1.49493,-0.820067,67.4407


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2301696
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9311667890829887
    agent_0_total_ball_to_goal_speed_reward_mean: 0.12021737842870994
    agent_0_total_ball_to_goal_speed_reward_min: -0.5135365240231461
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-04-22
  done: false
  episode_len_mean: 67.84482758620689
  episode_media: {}
  episode_reward_max: 1.3590615017661505
  episode_reward_mean: 0.22644989573987903
  episode_reward_min: -0.8422497188985663
  episodes_this_iter: 116
  episodes_total: 5010
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.6123317399024963
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,72,8444.84,575424,0.22645,1.35906,-0.84225,67.8448


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2333664
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6272453088020657
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06246518916994534
    agent_0_total_ball_to_goal_speed_reward_min: -0.4706380212250524
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-06-10
  done: false
  episode_len_mean: 63.13178294573643
  episode_media: {}
  episode_reward_max: 1.6263300698037169
  episode_reward_mean: 0.1817974002944584
  episode_reward_min: -0.7012193846129426
  episodes_this_iter: 129
  episodes_total: 5139
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.5619364938735962
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,73,8552.48,583416,0.181797,1.62633,-0.701219,63.1318


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2365632
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7806689783967866
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07849367477360512
    agent_0_total_ball_to_goal_speed_reward_min: -0.7690576487615872
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-07-57
  done: false
  episode_len_mean: 66.34920634920636
  episode_media: {}
  episode_reward_max: 1.573710192906209
  episode_reward_mean: 0.19405182346862282
  episode_reward_min: -1.1035403630046696
  episodes_this_iter: 126
  episodes_total: 5265
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.5557377834320067
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,74,8659.96,591408,0.194052,1.57371,-1.10354,66.3492


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2397600
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6752005910448636
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0858080175459635
    agent_0_total_ball_to_goal_speed_reward_min: -0.4991844454228357
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-09-45
  done: false
  episode_len_mean: 60.508620689655174
  episode_media: {}
  episode_reward_max: 1.337661218389803
  episode_reward_mean: 0.15632320927758672
  episode_reward_min: -0.724750403690456
  episodes_this_iter: 116
  episodes_total: 5381
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.5602878770828248
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,75,8767.51,599400,0.156323,1.33766,-0.72475,60.5086


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2429568
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6244994564588717
    agent_0_total_ball_to_goal_speed_reward_mean: 0.042286169453356164
    agent_0_total_ball_to_goal_speed_reward_min: -0.527391821962743
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-11-32
  done: false
  episode_len_mean: 66.6
  episode_media: {}
  episode_reward_max: 1.3007612971484135
  episode_reward_mean: 0.12556531100753254
  episode_reward_min: -1.3895068303432039
  episodes_this_iter: 130
  episodes_total: 5511
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.5414108414649963
          entropy_coeff: 0.0
          kl: 0.017726

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,76,8875.04,607392,0.125565,1.30076,-1.38951,66.6


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2461536
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5567291453465654
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0463624661766184
    agent_0_total_ball_to_goal_speed_reward_min: -0.623449814311938
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-13-21
  done: false
  episode_len_mean: 68.16239316239316
  episode_media: {}
  episode_reward_max: 1.2688243169265556
  episode_reward_mean: 0.1808904050085819
  episode_reward_min: -0.7340871143641876
  episodes_this_iter: 117
  episodes_total: 5628
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.5292282447814942
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,77,8983.7,615384,0.18089,1.26882,-0.734087,68.1624


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2493504
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8806337085924287
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08448331085455667
    agent_0_total_ball_to_goal_speed_reward_min: -0.49003916671135866
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-15-08
  done: false
  episode_len_mean: 68.28828828828829
  episode_media: {}
  episode_reward_max: 1.4284477045604538
  episode_reward_mean: 0.1355000624635489
  episode_reward_min: -1.0376270860724321
  episodes_this_iter: 111
  episodes_total: 5739
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.5273326835632324
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,78,9090.81,623376,0.1355,1.42845,-1.03763,68.2883


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2525472
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7742725896258551
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07187341607174028
    agent_0_total_ball_to_goal_speed_reward_min: -0.4096211599732369
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-16-56
  done: false
  episode_len_mean: 65.078125
  episode_media: {}
  episode_reward_max: 1.1261909191341108
  episode_reward_mean: 0.08369399886513583
  episode_reward_min: -2.3028217246121514
  episodes_this_iter: 128
  episodes_total: 5867
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.4835426363945008
          entropy_coeff: 0.0
          kl: 0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,79,9198.68,631368,0.083694,1.12619,-2.30282,65.0781


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2557440
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8775574285456815
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05311528668411017
    agent_0_total_ball_to_goal_speed_reward_min: -0.5621129535882151
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-18-43
  done: false
  episode_len_mean: 68.25688073394495
  episode_media: {}
  episode_reward_max: 1.2975994141234986
  episode_reward_mean: 0.18256554624155255
  episode_reward_min: -0.9835274093186364
  episodes_this_iter: 109
  episodes_total: 5976
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.5068264598846435
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,80,9305.88,639360,0.182566,1.2976,-0.983527,68.2569


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2589408
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7663033423560734
    agent_0_total_ball_to_goal_speed_reward_mean: 0.11134273225502875
    agent_0_total_ball_to_goal_speed_reward_min: -0.5086194844333414
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-20-35
  done: false
  episode_len_mean: 83.11428571428571
  episode_media: {}
  episode_reward_max: 1.2215746296029155
  episode_reward_mean: 0.10949127484528667
  episode_reward_min: -1.2810714720175767
  episodes_this_iter: 105
  episodes_total: 6081
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.476920433998108
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,81,9417.5,647352,0.109491,1.22157,-1.28107,83.1143


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2621376
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8808459887872638
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0847868122959946
    agent_0_total_ball_to_goal_speed_reward_min: -0.511096616643566
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-22-32
  done: false
  episode_len_mean: 63.75
  episode_media: {}
  episode_reward_max: 1.9113629322213637
  episode_reward_mean: 0.19288822625148377
  episode_reward_min: -1.1924591950222774
  episodes_this_iter: 116
  episodes_total: 6197
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.4527760000228882
          entropy_coeff: 0.0
          kl: 0.0184944

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,82,9534.22,655344,0.192888,1.91136,-1.19246,63.75


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2653344
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7902362927755694
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0994308953784384
    agent_0_total_ball_to_goal_speed_reward_min: -0.597218598331109
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-24-31
  done: false
  episode_len_mean: 71.30578512396694
  episode_media: {}
  episode_reward_max: 1.8038305608144714
  episode_reward_mean: 0.17164334448386048
  episode_reward_min: -1.2796727569437438
  episodes_this_iter: 121
  episodes_total: 6318
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.410855402469635
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,83,9653.2,663336,0.171643,1.80383,-1.27967,71.3058


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2685312
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7532074041715551
    agent_0_total_ball_to_goal_speed_reward_mean: 0.054903416876578716
    agent_0_total_ball_to_goal_speed_reward_min: -0.6148743922539741
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-26-31
  done: false
  episode_len_mean: 63.66393442622951
  episode_media: {}
  episode_reward_max: 1.12436315283147
  episode_reward_mean: 0.1775599086094911
  episode_reward_min: -2.0047568696219304
  episodes_this_iter: 122
  episodes_total: 6440
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.4118429350852966
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,84,9773.07,671328,0.17756,1.12436,-2.00476,63.6639


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2717280
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1294547003918656
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04013662286261264
    agent_0_total_ball_to_goal_speed_reward_min: -0.4725515470184552
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-28-29
  done: false
  episode_len_mean: 66.09243697478992
  episode_media: {}
  episode_reward_max: 1.3877897907865235
  episode_reward_mean: 0.06635200140375368
  episode_reward_min: -2.116088920107957
  episodes_this_iter: 119
  episodes_total: 6559
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.415392084121704
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,85,9891.29,679320,0.066352,1.38779,-2.11609,66.0924


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2749248
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8107777028247214
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05398287730303317
    agent_0_total_ball_to_goal_speed_reward_min: -0.5438919686015948
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-30-28
  done: false
  episode_len_mean: 66.98360655737704
  episode_media: {}
  episode_reward_max: 2.0883168022061986
  episode_reward_mean: 0.16795984961867036
  episode_reward_min: -0.6797232512618043
  episodes_this_iter: 122
  episodes_total: 6681
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3917043418884278
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,86,10010.2,687312,0.16796,2.08832,-0.679723,66.9836


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2781216
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7423491677030012
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06741390819308613
    agent_0_total_ball_to_goal_speed_reward_min: -0.5101432588508793
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-32-17
  done: false
  episode_len_mean: 62.44186046511628
  episode_media: {}
  episode_reward_max: 1.2745703180398573
  episode_reward_mean: 0.12182343446949909
  episode_reward_min: -0.8768871924073549
  episodes_this_iter: 129
  episodes_total: 6810
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3984122443199158
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,87,10119.5,695304,0.121823,1.27457,-0.876887,62.4419


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2813184
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5698416611172333
    agent_0_total_ball_to_goal_speed_reward_mean: 0.020598959095855764
    agent_0_total_ball_to_goal_speed_reward_min: -0.44062930732549843
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-34-07
  done: false
  episode_len_mean: 64.38842975206612
  episode_media: {}
  episode_reward_max: 1.3577301407061215
  episode_reward_mean: 0.10406015053994946
  episode_reward_min: -0.9732196254988419
  episodes_this_iter: 121
  episodes_total: 6931
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3786593909263611
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,88,10228.6,703296,0.10406,1.35773,-0.97322,64.3884


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2845152
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6266504387552385
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0012589107989407326
    agent_0_total_ball_to_goal_speed_reward_min: -0.5649126601116935
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-35-56
  done: false
  episode_len_mean: 65.95121951219512
  episode_media: {}
  episode_reward_max: 1.1384236718063487
  episode_reward_mean: 0.05620513201594232
  episode_reward_min: -1.2527234368266673
  episodes_this_iter: 123
  episodes_total: 7054
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3709570870399475
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,89,10337.5,711288,0.0562051,1.13842,-1.25272,65.9512


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2877120
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6883251992792014
    agent_0_total_ball_to_goal_speed_reward_mean: 0.045665982000129036
    agent_0_total_ball_to_goal_speed_reward_min: -0.5019694452614394
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-37-45
  done: false
  episode_len_mean: 67.19834710743801
  episode_media: {}
  episode_reward_max: 1.0632652593836327
  episode_reward_mean: 0.14723810639784804
  episode_reward_min: -1.0688858976215763
  episodes_this_iter: 121
  episodes_total: 7175
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3696611924171447
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,90,10446.3,719280,0.147238,1.06327,-1.06889,67.1983


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2909088
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0410841195185276
    agent_0_total_ball_to_goal_speed_reward_mean: 0.045095656799332355
    agent_0_total_ball_to_goal_speed_reward_min: -0.6995114162039189
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-39-33
  done: false
  episode_len_mean: 60.484848484848484
  episode_media: {}
  episode_reward_max: 1.2514483935660645
  episode_reward_mean: 0.12739258921371907
  episode_reward_min: -0.8715094499557463
  episodes_this_iter: 132
  episodes_total: 7307
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3344507007598876
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,91,10555,727272,0.127393,1.25145,-0.871509,60.4848


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2941056
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9196459838099259
    agent_0_total_ball_to_goal_speed_reward_mean: 0.046852840003101075
    agent_0_total_ball_to_goal_speed_reward_min: -0.5174433624187584
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-41-22
  done: false
  episode_len_mean: 62.627906976744185
  episode_media: {}
  episode_reward_max: 1.1426231307013919
  episode_reward_mean: 0.1279207468632079
  episode_reward_min: -1.0577098907669193
  episodes_this_iter: 129
  episodes_total: 7436
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3298298058509828
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,92,10664.1,735264,0.127921,1.14262,-1.05771,62.6279


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 2973024
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9071327693001323
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06031973983900761
    agent_0_total_ball_to_goal_speed_reward_min: -0.5278717652351532
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-43-18
  done: false
  episode_len_mean: 57.94074074074074
  episode_media: {}
  episode_reward_max: 1.317339995194423
  episode_reward_mean: 0.14643903639442044
  episode_reward_min: -0.9605656377342437
  episodes_this_iter: 135
  episodes_total: 7571
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.306732982635498
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,93,10779.5,743256,0.146439,1.31734,-0.960566,57.9407


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3004992
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7783183671436192
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07139557501304454
    agent_0_total_ball_to_goal_speed_reward_min: -0.6103194207599072
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-45-09
  done: false
  episode_len_mean: 70.91891891891892
  episode_media: {}
  episode_reward_max: 1.000093805861642
  episode_reward_mean: 0.15731187730992136
  episode_reward_min: -1.792686280007794
  episodes_this_iter: 111
  episodes_total: 7682
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3195203881263733
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,94,10890.4,751248,0.157312,1.00009,-1.79269,70.9189


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3036960
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.922195938388625
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03485793043435664
    agent_0_total_ball_to_goal_speed_reward_min: -0.4764053206018642
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-46-56
  done: false
  episode_len_mean: 59.82945736434109
  episode_media: {}
  episode_reward_max: 1.391740781022865
  episode_reward_mean: 0.12310494669129561
  episode_reward_min: -0.7696138810858542
  episodes_this_iter: 129
  episodes_total: 7811
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3066504168510438
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,95,10997.8,759240,0.123105,1.39174,-0.769614,59.8295


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3068928
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7606224820633488
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06570379627924154
    agent_0_total_ball_to_goal_speed_reward_min: -0.5587545394407071
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-48-43
  done: false
  episode_len_mean: 68.70689655172414
  episode_media: {}
  episode_reward_max: 1.1337868817356718
  episode_reward_mean: 0.12497730298418325
  episode_reward_min: -0.8602995237648909
  episodes_this_iter: 116
  episodes_total: 7927
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.3056403789520263
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,96,11104.3,767232,0.124977,1.13379,-0.8603,68.7069


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3100896
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6505934053232142
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05290895234677252
    agent_0_total_ball_to_goal_speed_reward_min: -0.8627242925831732
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-50-40
  done: false
  episode_len_mean: 60.35251798561151
  episode_media: {}
  episode_reward_max: 1.4058856975473653
  episode_reward_mean: 0.0703918008246611
  episode_reward_min: -1.2991710471456521
  episodes_this_iter: 139
  episodes_total: 8066
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.2674468545913695
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,97,11221,775224,0.0703918,1.40589,-1.29917,60.3525


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3132864
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7046516554968187
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07677119472500854
    agent_0_total_ball_to_goal_speed_reward_min: -0.555630515239379
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-52-39
  done: false
  episode_len_mean: 64.73553719008264
  episode_media: {}
  episode_reward_max: 1.4615731218219985
  episode_reward_mean: 0.09689006941667531
  episode_reward_min: -0.9633652087758652
  episodes_this_iter: 121
  episodes_total: 8187
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.2854437818527222
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,98,11340.5,783216,0.0968901,1.46157,-0.963365,64.7355


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3164832
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6428444714156551
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06302961187988568
    agent_0_total_ball_to_goal_speed_reward_min: -0.5949702998467258
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-54-38
  done: false
  episode_len_mean: 68.21551724137932
  episode_media: {}
  episode_reward_max: 1.3106478078208543
  episode_reward_mean: 0.15914707741764944
  episode_reward_min: -1.618421218164855
  episodes_this_iter: 116
  episodes_total: 8303
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.2715581774711608
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,99,11459.5,791208,0.159147,1.31065,-1.61842,68.2155


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3196800
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8141692441187846
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07052173102877636
    agent_0_total_ball_to_goal_speed_reward_min: -0.5228192492843641
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-56-44
  done: false
  episode_len_mean: 68.25
  episode_media: {}
  episode_reward_max: 1.316043358709459
  episode_reward_mean: 0.14054376401239344
  episode_reward_min: -0.7919931649536087
  episodes_this_iter: 120
  episodes_total: 8423
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.2665585927963257
          entropy_coeff: 0.0
          kl: 0.019636

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,100,11584.7,799200,0.140544,1.31604,-0.791993,68.25


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3228768
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9967084909857761
    agent_0_total_ball_to_goal_speed_reward_mean: 0.11008542231118633
    agent_0_total_ball_to_goal_speed_reward_min: -0.6365688841485116
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_17-58-46
  done: false
  episode_len_mean: 64.21428571428571
  episode_media: {}
  episode_reward_max: 1.204123072214534
  episode_reward_mean: 0.17038046181145577
  episode_reward_min: -1.0464100157587461
  episodes_this_iter: 126
  episodes_total: 8549
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.2337596192359925
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,101,11706.7,807192,0.17038,1.20412,-1.04641,64.2143


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3260736
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.803026656586438
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09577625782313234
    agent_0_total_ball_to_goal_speed_reward_min: -0.4893499654059666
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-00-49
  done: false
  episode_len_mean: 67.71186440677967
  episode_media: {}
  episode_reward_max: 1.1532751880913539
  episode_reward_mean: 0.20182069651534498
  episode_reward_min: -0.770997883525347
  episodes_this_iter: 118
  episodes_total: 8667
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.2359133787155152
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,102,11829.7,815184,0.201821,1.15328,-0.770998,67.7119


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3292704
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6511178315842253
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07218932364322465
    agent_0_total_ball_to_goal_speed_reward_min: -0.6376830160893316
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-02-54
  done: false
  episode_len_mean: 61.333333333333336
  episode_media: {}
  episode_reward_max: 1.1843121413153455
  episode_reward_mean: 0.09042617025057789
  episode_reward_min: -1.0654398942712353
  episodes_this_iter: 132
  episodes_total: 8799
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.2232996201515198
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,103,11954.6,823176,0.0904262,1.18431,-1.06544,61.3333


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3324672
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7033286257927854
    agent_0_total_ball_to_goal_speed_reward_mean: 0.12753006062842046
    agent_0_total_ball_to_goal_speed_reward_min: -0.541084951221246
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-05-01
  done: false
  episode_len_mean: 58.45522388059702
  episode_media: {}
  episode_reward_max: 1.3122695644020999
  episode_reward_mean: 0.19877120810321128
  episode_reward_min: -1.1060699723292244
  episodes_this_iter: 134
  episodes_total: 8933
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 0.675
          cur_lr: 5.0000000000000016e-05
          entropy: 1.2081178503036498
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,104,12081.1,831168,0.198771,1.31227,-1.10607,58.4552


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3356640
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7167311323976864
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0584655467453551
    agent_0_total_ball_to_goal_speed_reward_min: -0.5893076745308586
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-06-58
  done: false
  episode_len_mean: 64.0078125
  episode_media: {}
  episode_reward_max: 1.3673666012260415
  episode_reward_mean: 0.08899709561145021
  episode_reward_min: -0.8206778223613416
  episodes_this_iter: 128
  episodes_total: 9061
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1990696949958801
          entropy_coeff: 0.0
          kl: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,105,12198,839160,0.0889971,1.36737,-0.820678,64.0078


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3388608
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8161510755477155
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06193870822182444
    agent_0_total_ball_to_goal_speed_reward_min: -0.48316861877068457
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-08-53
  done: false
  episode_len_mean: 60.485074626865675
  episode_media: {}
  episode_reward_max: 1.2844458503992553
  episode_reward_mean: 0.14018725832273268
  episode_reward_min: -1.0542327922287766
  episodes_this_iter: 134
  episodes_total: 9195
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1888385882377626
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,106,12313.3,847152,0.140187,1.28445,-1.05423,60.4851


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3420576
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6872892125293583
    agent_0_total_ball_to_goal_speed_reward_mean: 0.056433897699001104
    agent_0_total_ball_to_goal_speed_reward_min: -0.7177490435605799
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-10-53
  done: false
  episode_len_mean: 58.07971014492754
  episode_media: {}
  episode_reward_max: 1.3131459635491725
  episode_reward_mean: 0.2336179452695623
  episode_reward_min: -0.7886277076419894
  episodes_this_iter: 138
  episodes_total: 9333
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1713146090507507
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,107,12432.7,855144,0.233618,1.31315,-0.788628,58.0797


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3452544
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7993871542977937
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05133040880666167
    agent_0_total_ball_to_goal_speed_reward_min: -0.6614967345133934
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-13-02
  done: false
  episode_len_mean: 63.81818181818182
  episode_media: {}
  episode_reward_max: 1.1724851535985934
  episode_reward_mean: 0.16528693902199154
  episode_reward_min: -0.860466361592777
  episodes_this_iter: 121
  episodes_total: 9454
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1736805033683777
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,108,12562.4,863136,0.165287,1.17249,-0.860466,63.8182


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3484512
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5725499070648025
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06809327345696904
    agent_0_total_ball_to_goal_speed_reward_min: -0.49656319920356506
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-15-04
  done: false
  episode_len_mean: 58.03007518796993
  episode_media: {}
  episode_reward_max: 1.6149382322658696
  episode_reward_mean: 0.1816706251554928
  episode_reward_min: -1.0498719951824622
  episodes_this_iter: 133
  episodes_total: 9587
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1669876804351806
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,109,12683.3,871128,0.181671,1.61494,-1.04987,58.0301


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3516480
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8867804583109175
    agent_0_total_ball_to_goal_speed_reward_mean: 0.032410075832713395
    agent_0_total_ball_to_goal_speed_reward_min: -0.561167294111984
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-17-13
  done: false
  episode_len_mean: 67.9
  episode_media: {}
  episode_reward_max: 1.2171458520392133
  episode_reward_mean: 0.11109611792890671
  episode_reward_min: -1.112198573270864
  episodes_this_iter: 120
  episodes_total: 9707
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1523093552589416
          entropy_coeff: 0.0
          kl: 0.016036

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,110,12812.7,879120,0.111096,1.21715,-1.1122,67.9


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3548448
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6803698602589896
    agent_0_total_ball_to_goal_speed_reward_mean: 0.01984940943161353
    agent_0_total_ball_to_goal_speed_reward_min: -0.7682692076709826
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-19-28
  done: false
  episode_len_mean: 65.70866141732283
  episode_media: {}
  episode_reward_max: 1.039981297210029
  episode_reward_mean: 0.11074360786136138
  episode_reward_min: -1.0575086457169518
  episodes_this_iter: 127
  episodes_total: 9834
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1515671877861022
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,111,12947.5,887112,0.110744,1.03998,-1.05751,65.7087


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3580416
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9067326480795882
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03779216159519647
    agent_0_total_ball_to_goal_speed_reward_min: -0.6867222644998803
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-21-39
  done: false
  episode_len_mean: 61.38461538461539
  episode_media: {}
  episode_reward_max: 1.3025802129435835
  episode_reward_mean: 0.101450647034486
  episode_reward_min: -1.1544452876169478
  episodes_this_iter: 130
  episodes_total: 9964
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1269352831840516
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,112,13078.5,895104,0.101451,1.30258,-1.15445,61.3846


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3612384
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8375869479858243
    agent_0_total_ball_to_goal_speed_reward_mean: 0.051512837370230345
    agent_0_total_ball_to_goal_speed_reward_min: -0.6165458448523694
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-23-32
  done: false
  episode_len_mean: 58.6
  episode_media: {}
  episode_reward_max: 1.0677034873711602
  episode_reward_mean: 0.13673786615841096
  episode_reward_min: -0.7080986364452908
  episodes_this_iter: 125
  episodes_total: 10089
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1452933974266053
          entropy_coeff: 0.0
          kl: 0.015

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,113,13191.7,903096,0.136738,1.0677,-0.708099,58.6


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3644352
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.584647825222127
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07619437644928319
    agent_0_total_ball_to_goal_speed_reward_min: -0.534172400097925
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-25-23
  done: false
  episode_len_mean: 71.76271186440678
  episode_media: {}
  episode_reward_max: 1.1226496302206659
  episode_reward_mean: 0.15357805288490545
  episode_reward_min: -1.0014191728217505
  episodes_this_iter: 118
  episodes_total: 10207
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1360234410762786
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,114,13302.1,911088,0.153578,1.12265,-1.00142,71.7627


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3676320
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.733176324798962
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04870433648078965
    agent_0_total_ball_to_goal_speed_reward_min: -0.5167185948641608
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-27-12
  done: false
  episode_len_mean: 62.46456692913386
  episode_media: {}
  episode_reward_max: 1.4388210720849541
  episode_reward_mean: 0.15657375695803294
  episode_reward_min: -0.7703815110544041
  episodes_this_iter: 127
  episodes_total: 10334
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1170676257610321
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,115,13411.5,919080,0.156574,1.43882,-0.770382,62.4646


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3708288
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6740717871605364
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05863994648488178
    agent_0_total_ball_to_goal_speed_reward_min: -0.47005424435847837
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-29-03
  done: false
  episode_len_mean: 66.91803278688525
  episode_media: {}
  episode_reward_max: 1.1613332031847743
  episode_reward_mean: 0.13565806840829234
  episode_reward_min: -1.0183128734799207
  episodes_this_iter: 122
  episodes_total: 10456
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1241020534038544
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,116,13521.6,927072,0.135658,1.16133,-1.01831,66.918


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3740256
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7823726274683942
    agent_0_total_ball_to_goal_speed_reward_mean: 0.036968979256190346
    agent_0_total_ball_to_goal_speed_reward_min: -0.4774146646121558
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-30-53
  done: false
  episode_len_mean: 65.272
  episode_media: {}
  episode_reward_max: 1.5700825495914028
  episode_reward_mean: 0.10786721095489729
  episode_reward_min: -1.3995170396800278
  episodes_this_iter: 125
  episodes_total: 10581
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1100629892349243
          entropy_coeff: 0.0
          kl: 0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,117,13631.8,935064,0.107867,1.57008,-1.39952,65.272


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3772224
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7836398943729417
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06140064028467777
    agent_0_total_ball_to_goal_speed_reward_min: -0.49821152429851384
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-32-44
  done: false
  episode_len_mean: 66.13513513513513
  episode_media: {}
  episode_reward_max: 1.0609339845536092
  episode_reward_mean: 0.12551532863780443
  episode_reward_min: -1.1691293568791785
  episodes_this_iter: 111
  episodes_total: 10692
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1120358693599701
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,118,13742.5,943056,0.125515,1.06093,-1.16913,66.1351


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3804192
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6803772323660875
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07334324943128477
    agent_0_total_ball_to_goal_speed_reward_min: -0.6184585883854359
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-34-38
  done: false
  episode_len_mean: 70.89830508474576
  episode_media: {}
  episode_reward_max: 1.3309943190420523
  episode_reward_mean: 0.16063193233739267
  episode_reward_min: -1.5116432765484578
  episodes_this_iter: 118
  episodes_total: 10810
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.1059112265110016
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,119,13856.5,951048,0.160632,1.33099,-1.51164,70.8983


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3836160
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8266688179355995
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09413815270688708
    agent_0_total_ball_to_goal_speed_reward_min: -0.550347076280207
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-36-27
  done: false
  episode_len_mean: 61.2406015037594
  episode_media: {}
  episode_reward_max: 1.3408445845665384
  episode_reward_mean: 0.17983873619548577
  episode_reward_min: -0.8637865883515234
  episodes_this_iter: 133
  episodes_total: 10943
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0869268724918366
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,120,13965.7,959040,0.179839,1.34084,-0.863787,61.2406


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3868128
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1222425344149207
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04141913201832468
    agent_0_total_ball_to_goal_speed_reward_min: -0.5591888909530688
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-38-17
  done: false
  episode_len_mean: 58.6015037593985
  episode_media: {}
  episode_reward_max: 1.2391533786762459
  episode_reward_mean: 0.15242612331182806
  episode_reward_min: -0.6915213750534259
  episodes_this_iter: 133
  episodes_total: 11076
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0704778552055358
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,121,14075.4,967032,0.152426,1.23915,-0.691521,58.6015


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3900096
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0390415726013256
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04612495318091399
    agent_0_total_ball_to_goal_speed_reward_min: -0.577238863567454
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-40-06
  done: false
  episode_len_mean: 63.52755905511811
  episode_media: {}
  episode_reward_max: 1.666204390852804
  episode_reward_mean: 0.1572957012187473
  episode_reward_min: -1.112571293122341
  episodes_this_iter: 127
  episodes_total: 11203
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0895512702465058
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,122,14184.7,975024,0.157296,1.6662,-1.11257,63.5276


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3932064
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1392695051302617
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04103615483026074
    agent_0_total_ball_to_goal_speed_reward_min: -0.41971556306644764
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-41-56
  done: false
  episode_len_mean: 61.388429752066116
  episode_media: {}
  episode_reward_max: 1.1450497442682583
  episode_reward_mean: 0.15204903578051004
  episode_reward_min: -0.8458802594030316
  episodes_this_iter: 121
  episodes_total: 11324
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0763094055652618
          entropy_coeff: 0.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,123,14294.7,983016,0.152049,1.14505,-0.84588,61.3884


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3964032
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7216800295863763
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0045644552999777235
    agent_0_total_ball_to_goal_speed_reward_min: -0.6721854539445473
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-43-46
  done: false
  episode_len_mean: 61.1865671641791
  episode_media: {}
  episode_reward_max: 1.7576271726478483
  episode_reward_mean: 0.10378554791933432
  episode_reward_min: -1.1937567336852526
  episodes_this_iter: 134
  episodes_total: 11458
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0548890993595124
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,124,14404.3,991008,0.103786,1.75763,-1.19376,61.1866


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 3996000
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9030095621426198
    agent_0_total_ball_to_goal_speed_reward_mean: 0.036676209002041174
    agent_0_total_ball_to_goal_speed_reward_min: -0.5701945513121283
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-45-36
  done: false
  episode_len_mean: 69.52100840336135
  episode_media: {}
  episode_reward_max: 1.5449748091431825
  episode_reward_mean: 0.20527433975271783
  episode_reward_min: -1.101592478963973
  episodes_this_iter: 119
  episodes_total: 11577
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0588390712738036
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,125,14514,999000,0.205274,1.54497,-1.10159,69.521


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4027968
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9224967740429831
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04596292608012641
    agent_0_total_ball_to_goal_speed_reward_min: -0.8699287095302569
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-47-26
  done: false
  episode_len_mean: 59.525925925925925
  episode_media: {}
  episode_reward_max: 1.3047436479374945
  episode_reward_mean: 0.10767035378829415
  episode_reward_min: -1.5743264941572654
  episodes_this_iter: 135
  episodes_total: 11712
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0208970391750336
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,126,14624.2,1006992,0.10767,1.30474,-1.57433,59.5259


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4059936
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7959358918012277
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06711519287014195
    agent_0_total_ball_to_goal_speed_reward_min: -0.4997564263984353
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-49-15
  done: false
  episode_len_mean: 59.05263157894737
  episode_media: {}
  episode_reward_max: 1.142848980955466
  episode_reward_mean: 0.20942797896230542
  episode_reward_min: -0.7362951578483556
  episodes_this_iter: 133
  episodes_total: 11845
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0261011469364165
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,127,14733.7,1014984,0.209428,1.14285,-0.736295,59.0526


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4091904
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.75850147112681
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03698992021794353
    agent_0_total_ball_to_goal_speed_reward_min: -0.43519860452115666
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-51-06
  done: false
  episode_len_mean: 59.833333333333336
  episode_media: {}
  episode_reward_max: 1.7628729855978857
  episode_reward_mean: 0.22451611366399826
  episode_reward_min: -0.8006872775054112
  episodes_this_iter: 132
  episodes_total: 11977
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0262014424800874
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,128,14843.8,1022976,0.224516,1.76287,-0.800687,59.8333


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4123872
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8034148597908781
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04805855828980595
    agent_0_total_ball_to_goal_speed_reward_min: -0.7467031798967539
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-52-56
  done: false
  episode_len_mean: 56.39007092198582
  episode_media: {}
  episode_reward_max: 1.392047091403895
  episode_reward_mean: 0.17638112945991927
  episode_reward_min: -0.7839617042449896
  episodes_this_iter: 141
  episodes_total: 12118
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0161363394260408
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,129,14954.2,1030968,0.176381,1.39205,-0.783962,56.3901


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4155840
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7564786712346702
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05452266938046401
    agent_0_total_ball_to_goal_speed_reward_min: -0.43297553783686005
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-54-46
  done: false
  episode_len_mean: 61.03076923076923
  episode_media: {}
  episode_reward_max: 0.952203098186156
  episode_reward_mean: 0.12300093676183524
  episode_reward_min: -0.9734857828017026
  episodes_this_iter: 130
  episodes_total: 12248
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.0181136496067047
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,130,15064.3,1038960,0.123001,0.952203,-0.973486,61.0308


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4187808
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1689948869972528
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06980782790103843
    agent_0_total_ball_to_goal_speed_reward_min: -0.5572142298320111
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-56-37
  done: false
  episode_len_mean: 68.056
  episode_media: {}
  episode_reward_max: 1.5150448998195154
  episode_reward_mean: 0.18005529904292072
  episode_reward_min: -0.9410025116202256
  episodes_this_iter: 125
  episodes_total: 12373
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9987958297729492
          entropy_coeff: 0.0
          kl: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,131,15175,1046952,0.180055,1.51504,-0.941003,68.056


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4219776
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8404579931441528
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07761028781547419
    agent_0_total_ball_to_goal_speed_reward_min: -0.42328877638222534
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_18-58-27
  done: false
  episode_len_mean: 60.37301587301587
  episode_media: {}
  episode_reward_max: 1.1059370925522374
  episode_reward_mean: 0.16275324573263472
  episode_reward_min: -0.9948707058647914
  episodes_this_iter: 126
  episodes_total: 12499
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 1.013537451505661
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,132,15285.4,1054944,0.162753,1.10594,-0.994871,60.373


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4251744
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.866921963570761
    agent_0_total_ball_to_goal_speed_reward_mean: 0.027538853696061894
    agent_0_total_ball_to_goal_speed_reward_min: -1.0606584677714939
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-00-18
  done: false
  episode_len_mean: 65.31451612903226
  episode_media: {}
  episode_reward_max: 1.1405347766102634
  episode_reward_mean: 0.08595914459842675
  episode_reward_min: -1.3329653215135302
  episodes_this_iter: 124
  episodes_total: 12623
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9993862042427063
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,133,15395.6,1062936,0.0859591,1.14053,-1.33297,65.3145


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4283712
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8668420817513338
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05733066811069163
    agent_0_total_ball_to_goal_speed_reward_min: -0.45961076442284465
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-02-13
  done: false
  episode_len_mean: 68.36752136752136
  episode_media: {}
  episode_reward_max: 1.3106033271267563
  episode_reward_mean: 0.04186896897139866
  episode_reward_min: -1.8305440429600623
  episodes_this_iter: 117
  episodes_total: 12740
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9821332173347473
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,134,15510.4,1070928,0.041869,1.3106,-1.83054,68.3675


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4315680
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9662746169818411
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07450950903160372
    agent_0_total_ball_to_goal_speed_reward_min: -0.6550979728609243
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-04-05
  done: false
  episode_len_mean: 58.49264705882353
  episode_media: {}
  episode_reward_max: 1.2285505319371772
  episode_reward_mean: 0.11946767546650999
  episode_reward_min: -2.0687279095397946
  episodes_this_iter: 136
  episodes_total: 12876
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9636428265571594
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,135,15622.4,1078920,0.119468,1.22855,-2.06873,58.4926


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4347648
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0285449032199026
    agent_0_total_ball_to_goal_speed_reward_mean: 0.045028508237843205
    agent_0_total_ball_to_goal_speed_reward_min: -0.47231361891261686
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-05-56
  done: false
  episode_len_mean: 59.007751937984494
  episode_media: {}
  episode_reward_max: 1.4923692740501684
  episode_reward_mean: 0.07561694459968706
  episode_reward_min: -0.9633533027969003
  episodes_this_iter: 129
  episodes_total: 13005
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9682573008537293
          entropy_coeff: 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,136,15734.2,1086912,0.0756169,1.49237,-0.963353,59.0078


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4379616
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9735350489908231
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08067939900202258
    agent_0_total_ball_to_goal_speed_reward_min: -0.570932711518739
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-07-48
  done: false
  episode_len_mean: 58.51048951048951
  episode_media: {}
  episode_reward_max: 1.3118063983775783
  episode_reward_mean: 0.07296076954166834
  episode_reward_min: -1.0121472246231311
  episodes_this_iter: 143
  episodes_total: 13148
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9439400599002838
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,137,15845.8,1094904,0.0729608,1.31181,-1.01215,58.5105


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4411584
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.829136076722297
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07868967414454872
    agent_0_total_ball_to_goal_speed_reward_min: -0.45867718188142487
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-09-40
  done: false
  episode_len_mean: 59.66428571428571
  episode_media: {}
  episode_reward_max: 1.203909551529718
  episode_reward_mean: 0.16411462159451976
  episode_reward_min: -0.8825767694094508
  episodes_this_iter: 140
  episodes_total: 13288
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9479724192619323
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,138,15957.2,1102896,0.164115,1.20391,-0.882577,59.6643


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4443552
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9607376947642031
    agent_0_total_ball_to_goal_speed_reward_mean: 0.028810684750692057
    agent_0_total_ball_to_goal_speed_reward_min: -0.8239876938720386
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-11-32
  done: false
  episode_len_mean: 63.36666666666667
  episode_media: {}
  episode_reward_max: 1.6882390697033944
  episode_reward_mean: 0.15967540159355362
  episode_reward_min: -1.2479147090565639
  episodes_this_iter: 120
  episodes_total: 13408
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9503139634132385
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,139,16069.1,1110888,0.159675,1.68824,-1.24791,63.3667


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4475520
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6529619464724171
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05486780275119442
    agent_0_total_ball_to_goal_speed_reward_min: -0.5793196537029137
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-13-24
  done: false
  episode_len_mean: 59.60769230769231
  episode_media: {}
  episode_reward_max: 1.0686203422158411
  episode_reward_mean: 0.10760208799380087
  episode_reward_min: -0.6321934925444701
  episodes_this_iter: 130
  episodes_total: 13538
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9506787567138671
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,140,16181.3,1118880,0.107602,1.06862,-0.632193,59.6077


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4507488
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.044148613489706
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07756006233235341
    agent_0_total_ball_to_goal_speed_reward_min: -0.5164316436085384
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-15-15
  done: false
  episode_len_mean: 68.51639344262296
  episode_media: {}
  episode_reward_max: 1.2055304828267586
  episode_reward_mean: 0.14931619309569416
  episode_reward_min: -0.7597401033908597
  episodes_this_iter: 122
  episodes_total: 13660
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9395087110996246
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,141,16292.8,1126872,0.149316,1.20553,-0.75974,68.5164


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4539456
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8949823264570191
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05522872895252496
    agent_0_total_ball_to_goal_speed_reward_min: -0.6383136761526738
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-17-08
  done: false
  episode_len_mean: 63.976
  episode_media: {}
  episode_reward_max: 1.1556551471271583
  episode_reward_mean: 0.06381386922675716
  episode_reward_min: -1.7221811148956507
  episodes_this_iter: 125
  episodes_total: 13785
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9330976765155792
          entropy_coeff: 0.0
          kl: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,142,16405,1134864,0.0638139,1.15566,-1.72218,63.976


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4571424
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8153006977505984
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06095355486163123
    agent_0_total_ball_to_goal_speed_reward_min: -0.49151726880650953
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-19-00
  done: false
  episode_len_mean: 55.27210884353742
  episode_media: {}
  episode_reward_max: 1.2122657452015795
  episode_reward_mean: 0.13949328441378575
  episode_reward_min: -0.8824010870546403
  episodes_this_iter: 147
  episodes_total: 13932
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9203081512451172
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,143,16516.8,1142856,0.139493,1.21227,-0.882401,55.2721


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4603392
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7367681837430042
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03581416778599368
    agent_0_total_ball_to_goal_speed_reward_min: -0.6987140327316289
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-20-51
  done: false
  episode_len_mean: 55.53284671532847
  episode_media: {}
  episode_reward_max: 1.1649729092449368
  episode_reward_mean: 0.05217783143910553
  episode_reward_min: -1.0693106348235533
  episodes_this_iter: 137
  episodes_total: 14069
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9192282288074494
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,144,16628.5,1150848,0.0521778,1.16497,-1.06931,55.5328


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4635360
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7631750074953703
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06805216667776977
    agent_0_total_ball_to_goal_speed_reward_min: -0.49101416641708806
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-22-43
  done: false
  episode_len_mean: 61.65413533834587
  episode_media: {}
  episode_reward_max: 1.5711789972027936
  episode_reward_mean: 0.15250011430842922
  episode_reward_min: -0.7908150133956742
  episodes_this_iter: 133
  episodes_total: 14202
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9132832000255585
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,145,16739.9,1158840,0.1525,1.57118,-0.790815,61.6541


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4667328
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6291385904222239
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0708796893721373
    agent_0_total_ball_to_goal_speed_reward_min: -0.42994344662909234
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-24-38
  done: false
  episode_len_mean: 65.65322580645162
  episode_media: {}
  episode_reward_max: 1.1886859506689245
  episode_reward_mean: 0.09505860654832851
  episode_reward_min: -1.067952036299885
  episodes_this_iter: 124
  episodes_total: 14326
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.9181484847068787
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,146,16854.7,1166832,0.0950586,1.18869,-1.06795,65.6532


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4699296
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7910746160423129
    agent_0_total_ball_to_goal_speed_reward_mean: 0.049159315307973066
    agent_0_total_ball_to_goal_speed_reward_min: -0.5470298105689505
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-26-29
  done: false
  episode_len_mean: 51.711409395973156
  episode_media: {}
  episode_reward_max: 1.3011939657861276
  episode_reward_mean: 0.12158592745771152
  episode_reward_min: -0.8113106011751858
  episodes_this_iter: 149
  episodes_total: 14475
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8944336082935334
          entropy_coeff: 0.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,147,16966.1,1174824,0.121586,1.30119,-0.811311,51.7114


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4731264
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7986176237534952
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09241401864029643
    agent_0_total_ball_to_goal_speed_reward_min: -0.5080974797077141
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-28-21
  done: false
  episode_len_mean: 58.44525547445255
  episode_media: {}
  episode_reward_max: 1.0761322594365996
  episode_reward_mean: 0.17740949435667555
  episode_reward_min: -0.7988017703203489
  episodes_this_iter: 137
  episodes_total: 14612
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8987103486061097
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,148,17077.7,1182816,0.177409,1.07613,-0.798802,58.4453


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4763232
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5854327460889464
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03342265610337313
    agent_0_total_ball_to_goal_speed_reward_min: -0.5860868765274263
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-30-12
  done: false
  episode_len_mean: 59.95652173913044
  episode_media: {}
  episode_reward_max: 1.3056552301236075
  episode_reward_mean: 0.10684833443579746
  episode_reward_min: -1.0271065717749606
  episodes_this_iter: 138
  episodes_total: 14750
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8885698552131653
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,149,17189.2,1190808,0.106848,1.30566,-1.02711,59.9565


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4795200
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6255330482551342
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04813423636770907
    agent_0_total_ball_to_goal_speed_reward_min: -0.6211360083257547
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-32-04
  done: false
  episode_len_mean: 55.275862068965516
  episode_media: {}
  episode_reward_max: 1.192274961452091
  episode_reward_mean: 0.07485541376572592
  episode_reward_min: -1.4404680635107328
  episodes_this_iter: 145
  episodes_total: 14895
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8959212956428528
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,150,17300.6,1198800,0.0748554,1.19227,-1.44047,55.2759


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4827168
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5569816250634344
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02058345167132713
    agent_0_total_ball_to_goal_speed_reward_min: -0.609115507313763
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-33-59
  done: false
  episode_len_mean: 54.6875
  episode_media: {}
  episode_reward_max: 1.1161450694401938
  episode_reward_mean: 0.06603840531817474
  episode_reward_min: -1.1724318423381845
  episodes_this_iter: 144
  episodes_total: 15039
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8876944477558136
          entropy_coeff: 0.0
          kl: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,151,17415.3,1206792,0.0660384,1.11615,-1.17243,54.6875


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4859136
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9461499882070441
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09293449548010427
    agent_0_total_ball_to_goal_speed_reward_min: -0.48484474749566064
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-35-50
  done: false
  episode_len_mean: 50.58641975308642
  episode_media: {}
  episode_reward_max: 1.2823193964951223
  episode_reward_mean: 0.15438255580360516
  episode_reward_min: -1.5439711000097391
  episodes_this_iter: 162
  episodes_total: 15201
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8686763782501221
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,152,17526.5,1214784,0.154383,1.28232,-1.54397,50.5864


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4891104
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8141472829516269
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06563040176456326
    agent_0_total_ball_to_goal_speed_reward_min: -0.5217631385904233
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-37-42
  done: false
  episode_len_mean: 60.62096774193548
  episode_media: {}
  episode_reward_max: 1.613783589477755
  episode_reward_mean: 0.1928970891683129
  episode_reward_min: -0.8267691955112408
  episodes_this_iter: 124
  episodes_total: 15325
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8829178793430328
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,153,17638,1222776,0.192897,1.61378,-0.826769,60.621


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4923072
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6692432110632522
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06765593429819773
    agent_0_total_ball_to_goal_speed_reward_min: -0.47294239907405616
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-39-33
  done: false
  episode_len_mean: 66.87903225806451
  episode_media: {}
  episode_reward_max: 1.3468401497883304
  episode_reward_mean: 0.16085068108623937
  episode_reward_min: -0.6865683634208499
  episodes_this_iter: 124
  episodes_total: 15449
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8988233268260956
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,154,17749,1230768,0.160851,1.34684,-0.686568,66.879


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4955040
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8488770595716203
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06363230057435865
    agent_0_total_ball_to_goal_speed_reward_min: -0.7984378656793937
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-41-24
  done: false
  episode_len_mean: 55.395833333333336
  episode_media: {}
  episode_reward_max: 1.2860388683200061
  episode_reward_mean: 0.1692121134158433
  episode_reward_min: -1.0360632220814345
  episodes_this_iter: 144
  episodes_total: 15593
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8641545753479004
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,155,17860.2,1238760,0.169212,1.28604,-1.03606,55.3958


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 4987008
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8036225830431781
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02538735183343171
    agent_0_total_ball_to_goal_speed_reward_min: -0.4724510459759658
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-43-15
  done: false
  episode_len_mean: 59.18840579710145
  episode_media: {}
  episode_reward_max: 1.4213061832660698
  episode_reward_mean: 0.15480895456993088
  episode_reward_min: -1.466087649057405
  episodes_this_iter: 138
  episodes_total: 15731
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8688217341899872
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,156,17971.4,1246752,0.154809,1.42131,-1.46609,59.1884


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5018976
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9031874942054521
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0674535737204779
    agent_0_total_ball_to_goal_speed_reward_min: -0.5119345852014928
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-45-06
  done: false
  episode_len_mean: 59.11450381679389
  episode_media: {}
  episode_reward_max: 1.4003808601398444
  episode_reward_mean: 0.16546999750519478
  episode_reward_min: -0.7691723643536852
  episodes_this_iter: 131
  episodes_total: 15862
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8594609267711639
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,157,18082.6,1254744,0.16547,1.40038,-0.769172,59.1145


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5050944
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7426507556515939
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03436435545595616
    agent_0_total_ball_to_goal_speed_reward_min: -0.6037049105406828
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-47-02
  done: false
  episode_len_mean: 54.6
  episode_media: {}
  episode_reward_max: 1.2427330722646253
  episode_reward_mean: 0.03524889203605812
  episode_reward_min: -1.2857796985244194
  episodes_this_iter: 145
  episodes_total: 16007
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8446917841434479
          entropy_coeff: 0.0
          kl: 0.0169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,158,18198.5,1262736,0.0352489,1.24273,-1.28578,54.6


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5082912
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9806477671183849
    agent_0_total_ball_to_goal_speed_reward_mean: 0.059576617487571884
    agent_0_total_ball_to_goal_speed_reward_min: -0.5860791685306435
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-48-55
  done: false
  episode_len_mean: 57.824817518248175
  episode_media: {}
  episode_reward_max: 1.0123852942679519
  episode_reward_mean: 0.1427345213171106
  episode_reward_min: -0.985519171862179
  episodes_this_iter: 137
  episodes_total: 16144
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8630993456840516
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,159,18311,1270728,0.142735,1.01239,-0.985519,57.8248


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5114880
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8903637492240831
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08298975182181163
    agent_0_total_ball_to_goal_speed_reward_min: -0.6469455526250748
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-50-48
  done: false
  episode_len_mean: 54.49333333333333
  episode_media: {}
  episode_reward_max: 1.3797029306082775
  episode_reward_mean: 0.1367204162835511
  episode_reward_min: -1.3875203429595253
  episodes_this_iter: 150
  episodes_total: 16294
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8517120635509491
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,160,18423.7,1278720,0.13672,1.3797,-1.38752,54.4933


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5146848
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7171116265926205
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06111368749769933
    agent_0_total_ball_to_goal_speed_reward_min: -0.47244989007317023
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-52-39
  done: false
  episode_len_mean: 54.76027397260274
  episode_media: {}
  episode_reward_max: 1.573248154252473
  episode_reward_mean: 0.11444439455407233
  episode_reward_min: -1.0998068947397466
  episodes_this_iter: 146
  episodes_total: 16440
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8444637024402618
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,161,18535.2,1286712,0.114444,1.57325,-1.09981,54.7603


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5178816
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8293257232635123
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07149004845258028
    agent_0_total_ball_to_goal_speed_reward_min: -0.6422122498349627
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-54-32
  done: false
  episode_len_mean: 60.68181818181818
  episode_media: {}
  episode_reward_max: 1.7156038099487785
  episode_reward_mean: 0.1262924359712171
  episode_reward_min: -0.8883336026360189
  episodes_this_iter: 132
  episodes_total: 16572
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8562452390193939
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,162,18647.5,1294704,0.126292,1.7156,-0.888334,60.6818


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5210784
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8541814520161459
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0017585422360785587
    agent_0_total_ball_to_goal_speed_reward_min: -1.0156056773014783
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-56-24
  done: false
  episode_len_mean: 65.83471074380165
  episode_media: {}
  episode_reward_max: 1.2147113680095223
  episode_reward_mean: 0.03978979454130755
  episode_reward_min: -2.3089509724564277
  episodes_this_iter: 121
  episodes_total: 16693
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8480091083049774
          entropy_coeff: 0.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,163,18759.2,1302696,0.0397898,1.21471,-2.30895,65.8347


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5242752
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.857871173449933
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06685022291256414
    agent_0_total_ball_to_goal_speed_reward_min: -0.5242668143747277
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_19-58-15
  done: false
  episode_len_mean: 57.542857142857144
  episode_media: {}
  episode_reward_max: 1.9948157146671623
  episode_reward_mean: 0.14547994148077506
  episode_reward_min: -0.8971631653932726
  episodes_this_iter: 140
  episodes_total: 16833
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8367165517807007
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,164,18870.3,1310688,0.14548,1.99482,-0.897163,57.5429


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5274720
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8744007701587533
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04033572250668777
    agent_0_total_ball_to_goal_speed_reward_min: -0.510013008665352
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-00-06
  done: false
  episode_len_mean: 55.30821917808219
  episode_media: {}
  episode_reward_max: 1.4643850587082232
  episode_reward_mean: 0.13203669778482593
  episode_reward_min: -0.9142489080212146
  episodes_this_iter: 146
  episodes_total: 16979
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8240803439617157
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,165,18981.7,1318680,0.132037,1.46439,-0.914249,55.3082


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5306688
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8717087202323531
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08069654295227477
    agent_0_total_ball_to_goal_speed_reward_min: -0.47923731364327465
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-01-58
  done: false
  episode_len_mean: 50.32911392405063
  episode_media: {}
  episode_reward_max: 1.360030804633472
  episode_reward_mean: 0.14409828596039254
  episode_reward_min: -0.8774282409992502
  episodes_this_iter: 158
  episodes_total: 17137
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8229398393630981
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,166,19093.4,1326672,0.144098,1.36003,-0.877428,50.3291


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5338656
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8685607464442053
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0332756011055099
    agent_0_total_ball_to_goal_speed_reward_min: -0.5700172631522802
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-03-49
  done: false
  episode_len_mean: 56.345588235294116
  episode_media: {}
  episode_reward_max: 1.0773754310064811
  episode_reward_mean: 0.11063890185094011
  episode_reward_min: -1.0661185289082953
  episodes_this_iter: 136
  episodes_total: 17273
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8289266309738159
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,167,19204.8,1334664,0.110639,1.07738,-1.06612,56.3456


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5370624
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8017600449327462
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07452051726840832
    agent_0_total_ball_to_goal_speed_reward_min: -0.56841471070543
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-05-42
  done: false
  episode_len_mean: 65.5725806451613
  episode_media: {}
  episode_reward_max: 1.5978400123936691
  episode_reward_mean: 0.1924972492111322
  episode_reward_min: -1.4962325773256069
  episodes_this_iter: 124
  episodes_total: 17397
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8237422738075256
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,168,19316.9,1342656,0.192497,1.59784,-1.49623,65.5726


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5402592
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9026335332943529
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07939377907636347
    agent_0_total_ball_to_goal_speed_reward_min: -0.46658095078127904
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-07-33
  done: false
  episode_len_mean: 57.26712328767123
  episode_media: {}
  episode_reward_max: 1.2954168914299595
  episode_reward_mean: 0.19706694421315007
  episode_reward_min: -0.9785034448621985
  episodes_this_iter: 146
  episodes_total: 17543
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8099859652519226
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,169,19427.9,1350648,0.197067,1.29542,-0.978503,57.2671


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5434560
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8927262098895217
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03405320177563298
    agent_0_total_ball_to_goal_speed_reward_min: -0.7849125811665506
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-09-24
  done: false
  episode_len_mean: 56.75757575757576
  episode_media: {}
  episode_reward_max: 1.2029617330835136
  episode_reward_mean: 0.13808772242005704
  episode_reward_min: -0.8754280519473268
  episodes_this_iter: 132
  episodes_total: 17675
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8138292002677917
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,170,19538.8,1358640,0.138088,1.20296,-0.875428,56.7576


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5466528
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8529094265858378
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07395070461838243
    agent_0_total_ball_to_goal_speed_reward_min: -0.5254388754200519
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-11-15
  done: false
  episode_len_mean: 58.00714285714286
  episode_media: {}
  episode_reward_max: 1.1011927430805133
  episode_reward_mean: 0.1492358405272794
  episode_reward_min: -1.1518453465374932
  episodes_this_iter: 140
  episodes_total: 17815
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8144886302947998
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,171,19650.1,1366632,0.149236,1.10119,-1.15185,58.0071


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5498496
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9172637963620932
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07850888240763862
    agent_0_total_ball_to_goal_speed_reward_min: -0.6043353789065272
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-13-08
  done: false
  episode_len_mean: 61.57251908396947
  episode_media: {}
  episode_reward_max: 1.3774904435232536
  episode_reward_mean: 0.1006359334657125
  episode_reward_min: -1.2089025427913223
  episodes_this_iter: 131
  episodes_total: 17946
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8069061629772186
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,172,19763.2,1374624,0.100636,1.37749,-1.2089,61.5725


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5530464
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8951935214157002
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06522271508306891
    agent_0_total_ball_to_goal_speed_reward_min: -0.7708162455848417
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-15-00
  done: false
  episode_len_mean: 67.10924369747899
  episode_media: {}
  episode_reward_max: 1.589766328365112
  episode_reward_mean: 0.13401652559994673
  episode_reward_min: -1.297737859190816
  episodes_this_iter: 119
  episodes_total: 18065
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.8182702136039733
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,173,19874.9,1382616,0.134017,1.58977,-1.29774,67.1092


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5562432
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.78444337671104
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07698065877666459
    agent_0_total_ball_to_goal_speed_reward_min: -0.5315344301003776
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-16-52
  done: false
  episode_len_mean: 55.95035460992908
  episode_media: {}
  episode_reward_max: 1.262085927868037
  episode_reward_mean: 0.1678171716420447
  episode_reward_min: -0.9914696678822414
  episodes_this_iter: 141
  episodes_total: 18206
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.790758819103241
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,174,19987.1,1390608,0.167817,1.26209,-0.99147,55.9504


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5594400
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7671681597233952
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06486198454504179
    agent_0_total_ball_to_goal_speed_reward_min: -0.6528891397803178
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-18-44
  done: false
  episode_len_mean: 62.98412698412698
  episode_media: {}
  episode_reward_max: 1.2743416883349705
  episode_reward_mean: 0.15368975307571822
  episode_reward_min: -2.0424461330587302
  episodes_this_iter: 126
  episodes_total: 18332
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.80481755900383
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,175,20098.6,1398600,0.15369,1.27434,-2.04245,62.9841


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5626368
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.802097989051981
    agent_0_total_ball_to_goal_speed_reward_mean: 0.052899328116032596
    agent_0_total_ball_to_goal_speed_reward_min: -0.5825109603561813
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-20-36
  done: false
  episode_len_mean: 56.64335664335665
  episode_media: {}
  episode_reward_max: 1.276798606287025
  episode_reward_mean: 0.07514818153957963
  episode_reward_min: -1.1687027364044478
  episodes_this_iter: 143
  episodes_total: 18475
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.792798355102539
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,176,20210.3,1406592,0.0751482,1.2768,-1.1687,56.6434


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5658336
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6769325839532054
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08740855408279082
    agent_0_total_ball_to_goal_speed_reward_min: -0.5416101264333565
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-22-30
  done: false
  episode_len_mean: 54.72222222222222
  episode_media: {}
  episode_reward_max: 1.2450059774277
  episode_reward_mean: 0.17462448728419616
  episode_reward_min: -0.7704300175389982
  episodes_this_iter: 144
  episodes_total: 18619
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7819313855171204
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,177,20324.4,1414584,0.174624,1.24501,-0.77043,54.7222


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5690304
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.998993404743924
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05183723209063536
    agent_0_total_ball_to_goal_speed_reward_min: -0.5179988302341534
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-24-24
  done: false
  episode_len_mean: 50.9875
  episode_media: {}
  episode_reward_max: 1.7447900890021542
  episode_reward_mean: 0.10872469811177685
  episode_reward_min: -0.9766649589514227
  episodes_this_iter: 160
  episodes_total: 18779
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7766924085617065
          entropy_coeff: 0.0
          kl: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,178,20438,1422576,0.108725,1.74479,-0.976665,50.9875


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5722272
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9578919456682237
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06271361594321646
    agent_0_total_ball_to_goal_speed_reward_min: -0.5597180815826523
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-26-16
  done: false
  episode_len_mean: 62.80916030534351
  episode_media: {}
  episode_reward_max: 1.381788048193195
  episode_reward_mean: 0.14202748640147284
  episode_reward_min: -1.1210059472011369
  episodes_this_iter: 131
  episodes_total: 18910
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7863701648712158
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,179,20549.9,1430568,0.142027,1.38179,-1.12101,62.8092


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5754240
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9041205273708695
    agent_0_total_ball_to_goal_speed_reward_mean: 0.010870257250657225
    agent_0_total_ball_to_goal_speed_reward_min: -0.5756150827004501
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-28-10
  done: false
  episode_len_mean: 57.28787878787879
  episode_media: {}
  episode_reward_max: 1.5511126881267971
  episode_reward_mean: 0.09300023645517097
  episode_reward_min: -0.9297696668385487
  episodes_this_iter: 132
  episodes_total: 19042
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7750306708812713
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,180,20664,1438560,0.0930002,1.55111,-0.92977,57.2879


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5786208
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.71213447082528
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05250207958148847
    agent_0_total_ball_to_goal_speed_reward_min: -0.5134116537785349
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-30-06
  done: false
  episode_len_mean: 62.32539682539682
  episode_media: {}
  episode_reward_max: 1.809930193815199
  episode_reward_mean: 0.16661837542677352
  episode_reward_min: -0.8419936979482783
  episodes_this_iter: 126
  episodes_total: 19168
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7834176707267761
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,181,20779.8,1446552,0.166618,1.80993,-0.841994,62.3254


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5818176
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1028013220000499
    agent_0_total_ball_to_goal_speed_reward_mean: 0.050503641662947095
    agent_0_total_ball_to_goal_speed_reward_min: -1.2485564040974926
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-32-00
  done: false
  episode_len_mean: 59.5514705882353
  episode_media: {}
  episode_reward_max: 1.5836905892478859
  episode_reward_mean: 0.1315355492100619
  episode_reward_min: -1.2742259506885727
  episodes_this_iter: 136
  episodes_total: 19304
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7733894670009613
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,182,20894,1454544,0.131536,1.58369,-1.27423,59.5515


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5850144
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7966045586853954
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03657673164876356
    agent_0_total_ball_to_goal_speed_reward_min: -0.5837111436582192
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-33-54
  done: false
  episode_len_mean: 62.01538461538462
  episode_media: {}
  episode_reward_max: 1.2535031838830815
  episode_reward_mean: 0.09069500322470489
  episode_reward_min: -0.9008683044542145
  episodes_this_iter: 130
  episodes_total: 19434
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7784501445293427
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,183,21008.2,1462536,0.090695,1.2535,-0.900868,62.0154


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5882112
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7610095177488527
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05045541321629701
    agent_0_total_ball_to_goal_speed_reward_min: -0.6187134034825432
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-35-48
  done: false
  episode_len_mean: 56.156462585034014
  episode_media: {}
  episode_reward_max: 1.1460564862334546
  episode_reward_mean: 0.12343462166836602
  episode_reward_min: -1.15492860925539
  episodes_this_iter: 147
  episodes_total: 19581
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7613628363609314
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,184,21122,1470528,0.123435,1.14606,-1.15493,56.1565


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5914080
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.37745088423514
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06553841307862725
    agent_0_total_ball_to_goal_speed_reward_min: -0.6131955710267321
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-37-42
  done: false
  episode_len_mean: 56.54814814814815
  episode_media: {}
  episode_reward_max: 1.7869721730143804
  episode_reward_mean: 0.19558471061856794
  episode_reward_min: -1.0279280644315185
  episodes_this_iter: 135
  episodes_total: 19716
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.767013388633728
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,185,21236.3,1478520,0.195585,1.78697,-1.02793,56.5481


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5946048
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.604241149793091
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06979996997622025
    agent_0_total_ball_to_goal_speed_reward_min: -0.4723669350454611
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-39-36
  done: false
  episode_len_mean: 66.7063492063492
  episode_media: {}
  episode_reward_max: 1.03369093764125
  episode_reward_mean: 0.07810983021213741
  episode_reward_min: -1.3243236727050256
  episodes_this_iter: 126
  episodes_total: 19842
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7732017936706543
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,186,21350.4,1486512,0.0781098,1.03369,-1.32432,66.7063


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 5978016
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.007643729391921
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10179877636155234
    agent_0_total_ball_to_goal_speed_reward_min: -0.5080993498035522
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-41-32
  done: false
  episode_len_mean: 60.83846153846154
  episode_media: {}
  episode_reward_max: 1.5590185456228076
  episode_reward_mean: 0.16820253638124116
  episode_reward_min: -0.927355331469573
  episodes_this_iter: 130
  episodes_total: 19972
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7575282912254333
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,187,21465.5,1494504,0.168203,1.55902,-0.927355,60.8385


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6009984
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8395253889071226
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02533145142479455
    agent_0_total_ball_to_goal_speed_reward_min: -0.6301950323717154
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-43-26
  done: false
  episode_len_mean: 60.20610687022901
  episode_media: {}
  episode_reward_max: 1.206384224521177
  episode_reward_mean: 0.14341063922273065
  episode_reward_min: -0.9029824064452946
  episodes_this_iter: 131
  episodes_total: 20103
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7459264376163482
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,188,21580.1,1502496,0.143411,1.20638,-0.902982,60.2061


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6041952
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7742367466003336
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05988195329984899
    agent_0_total_ball_to_goal_speed_reward_min: -0.5681993568902545
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-45-27
  done: false
  episode_len_mean: 67.89166666666667
  episode_media: {}
  episode_reward_max: 1.2775719951019413
  episode_reward_mean: 0.0721530524761049
  episode_reward_min: -1.380075030619175
  episodes_this_iter: 120
  episodes_total: 20223
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7686263837814331
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,189,21700.4,1510488,0.0721531,1.27757,-1.38008,67.8917


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6073920
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7853172038021964
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04647330082431971
    agent_0_total_ball_to_goal_speed_reward_min: -0.5644067337556561
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-47-21
  done: false
  episode_len_mean: 58.474074074074075
  episode_media: {}
  episode_reward_max: 1.303595676925299
  episode_reward_mean: 0.1424178063821983
  episode_reward_min: -0.6540117259600344
  episodes_this_iter: 135
  episodes_total: 20358
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7599847991466522
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,190,21814.9,1518480,0.142418,1.3036,-0.654012,58.4741


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6105888
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.4466750896247103
    agent_0_total_ball_to_goal_speed_reward_mean: 0.033140417976984374
    agent_0_total_ball_to_goal_speed_reward_min: -0.6994074249017165
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-49-15
  done: false
  episode_len_mean: 55.208333333333336
  episode_media: {}
  episode_reward_max: 2.1372976465250524
  episode_reward_mean: 0.06804222733092792
  episode_reward_min: -1.088535058260252
  episodes_this_iter: 144
  episodes_total: 20502
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7510665717124939
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,191,21928.8,1526472,0.0680422,2.1373,-1.08854,55.2083


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6137856
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6232312337564438
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07306947007620129
    agent_0_total_ball_to_goal_speed_reward_min: -0.5080992286201946
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-51-11
  done: false
  episode_len_mean: 66.46280991735537
  episode_media: {}
  episode_reward_max: 1.328472698741907
  episode_reward_mean: 0.23612729932876472
  episode_reward_min: -0.8051881921581476
  episodes_this_iter: 121
  episodes_total: 20623
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.744781709909439
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,192,22044.4,1534464,0.236127,1.32847,-0.805188,66.4628


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6169824
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.619928608724948
    agent_0_total_ball_to_goal_speed_reward_mean: 0.01564764972113936
    agent_0_total_ball_to_goal_speed_reward_min: -0.7463623115574005
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-53-03
  done: false
  episode_len_mean: 63.496
  episode_media: {}
  episode_reward_max: 1.0900927700585759
  episode_reward_mean: 0.09782057252629241
  episode_reward_min: -1.5495472684744271
  episodes_this_iter: 125
  episodes_total: 20748
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7577931790351867
          entropy_coeff: 0.0
          kl: 0.018

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,193,22156.7,1542456,0.0978206,1.09009,-1.54955,63.496


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6201792
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7889383646129581
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05192182388185859
    agent_0_total_ball_to_goal_speed_reward_min: -0.5040533816825635
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-54-55
  done: false
  episode_len_mean: 55.798507462686565
  episode_media: {}
  episode_reward_max: 1.1116692064302764
  episode_reward_mean: 0.13163129865558273
  episode_reward_min: -1.7381384452459443
  episodes_this_iter: 134
  episodes_total: 20882
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7280210688114166
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,194,22268.6,1550448,0.131631,1.11167,-1.73814,55.7985


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6233760
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9594823725016441
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06828272151939695
    agent_0_total_ball_to_goal_speed_reward_min: -0.6742923619350515
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-56-47
  done: false
  episode_len_mean: 57.54
  episode_media: {}
  episode_reward_max: 1.9225701478063573
  episode_reward_mean: 0.23150977773438924
  episode_reward_min: -0.8932734008436221
  episodes_this_iter: 150
  episodes_total: 21032
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.731271889925003
          entropy_coeff: 0.0
          kl: 0.0177

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,195,22380.4,1558440,0.23151,1.92257,-0.893273,57.54


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6265728
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6746371340014998
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04750749835785692
    agent_0_total_ball_to_goal_speed_reward_min: -0.5081046381305483
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_20-58-39
  done: false
  episode_len_mean: 52.136986301369866
  episode_media: {}
  episode_reward_max: 1.3330858867902655
  episode_reward_mean: 0.1405483719875892
  episode_reward_min: -0.9579431439611552
  episodes_this_iter: 146
  episodes_total: 21178
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7317963330745697
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,196,22492.3,1566432,0.140548,1.33309,-0.957943,52.137


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6297696
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.953893207389607
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05592520813301104
    agent_0_total_ball_to_goal_speed_reward_min: -0.658833406719002
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-00-31
  done: false
  episode_len_mean: 69.36134453781513
  episode_media: {}
  episode_reward_max: 1.3197079135776981
  episode_reward_mean: 0.07228183292250806
  episode_reward_min: -1.2182097045544031
  episodes_this_iter: 119
  episodes_total: 21297
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7449686162471771
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,197,22604.8,1574424,0.0722818,1.31971,-1.21821,69.3613


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6329664
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.159648035998269
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04000146547144838
    agent_0_total_ball_to_goal_speed_reward_min: -0.5869907380343592
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-02-23
  done: false
  episode_len_mean: 57.23357664233577
  episode_media: {}
  episode_reward_max: 1.0918449085917055
  episode_reward_mean: 0.11806428665790528
  episode_reward_min: -1.232749852652459
  episodes_this_iter: 137
  episodes_total: 21434
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7321428277492523
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,198,22716.6,1582416,0.118064,1.09184,-1.23275,57.2336


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6361632
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9804539401546796
    agent_0_total_ball_to_goal_speed_reward_mean: 0.055795541688200105
    agent_0_total_ball_to_goal_speed_reward_min: -0.5138998423594258
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-04-16
  done: false
  episode_len_mean: 61.946969696969695
  episode_media: {}
  episode_reward_max: 1.611874771165052
  episode_reward_mean: 0.22871400910693493
  episode_reward_min: -0.809922613971259
  episodes_this_iter: 132
  episodes_total: 21566
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7350200238227844
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,199,22829.4,1590408,0.228714,1.61187,-0.809923,61.947


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6393600
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9777486663004101
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06452676389478618
    agent_0_total_ball_to_goal_speed_reward_min: -0.4813400680295066
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-06-08
  done: false
  episode_len_mean: 58.41044776119403
  episode_media: {}
  episode_reward_max: 1.6603411321003305
  episode_reward_mean: 0.19181159461978264
  episode_reward_min: -0.6952923181759316
  episodes_this_iter: 134
  episodes_total: 21700
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.72106418633461
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,200,22941.6,1598400,0.191812,1.66034,-0.695292,58.4104


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6425568
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.750746468655373
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09130493857025228
    agent_0_total_ball_to_goal_speed_reward_min: -0.5080992465467268
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-08-01
  done: false
  episode_len_mean: 55.70422535211268
  episode_media: {}
  episode_reward_max: 1.5931987409020745
  episode_reward_mean: 0.1483806544391259
  episode_reward_min: -0.9736436274159568
  episodes_this_iter: 142
  episodes_total: 21842
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7293548619747162
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,201,23053.8,1606392,0.148381,1.5932,-0.973644,55.7042


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6457536
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7831294677757393
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07112837327183115
    agent_0_total_ball_to_goal_speed_reward_min: -0.49509452216197836
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-09-52
  done: false
  episode_len_mean: 62.723076923076924
  episode_media: {}
  episode_reward_max: 1.5308827132228444
  episode_reward_mean: 0.2051485956864877
  episode_reward_min: -0.6732985746692952
  episodes_this_iter: 130
  episodes_total: 21972
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7292510061264038
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,202,23165.3,1614384,0.205149,1.53088,-0.673299,62.7231


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6489504
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.88992471989146
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06896144604843071
    agent_0_total_ball_to_goal_speed_reward_min: -0.5171920073309693
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-11-46
  done: false
  episode_len_mean: 60.04347826086956
  episode_media: {}
  episode_reward_max: 1.2910599182758564
  episode_reward_mean: 0.12233439851787066
  episode_reward_min: -1.1080360364380066
  episodes_this_iter: 138
  episodes_total: 22110
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7280112125873566
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,203,23278.6,1622376,0.122334,1.29106,-1.10804,60.0435


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6521472
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.897328489681304
    agent_0_total_ball_to_goal_speed_reward_mean: 0.040551951716648396
    agent_0_total_ball_to_goal_speed_reward_min: -0.6898015870582153
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-13-40
  done: false
  episode_len_mean: 60.193548387096776
  episode_media: {}
  episode_reward_max: 1.0214331634179796
  episode_reward_mean: 0.08114010866697552
  episode_reward_min: -0.8083381011151283
  episodes_this_iter: 124
  episodes_total: 22234
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7238280169963837
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,204,23392.6,1630368,0.0811401,1.02143,-0.808338,60.1935


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6553440
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8371615746907879
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05351521964510209
    agent_0_total_ball_to_goal_speed_reward_min: -0.6423216558007679
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-15-32
  done: false
  episode_len_mean: 65.25
  episode_media: {}
  episode_reward_max: 1.0109735124710004
  episode_reward_mean: 0.11112749392103921
  episode_reward_min: -1.4054166444770788
  episodes_this_iter: 124
  episodes_total: 22358
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.731073177576065
          entropy_coeff: 0.0
          kl: 0.0178

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,205,23504.6,1638360,0.111127,1.01097,-1.40542,65.25


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6585408
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9634333432315554
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04493454260185326
    agent_0_total_ball_to_goal_speed_reward_min: -0.6087045254835853
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-17-25
  done: false
  episode_len_mean: 56.41258741258741
  episode_media: {}
  episode_reward_max: 1.190439146999133
  episode_reward_mean: 0.12333754318505012
  episode_reward_min: -1.1276773098119768
  episodes_this_iter: 143
  episodes_total: 22501
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7105919535160065
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,206,23617.2,1646352,0.123338,1.19044,-1.12768,56.4126


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6617376
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8106323303544349
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0802512364837783
    agent_0_total_ball_to_goal_speed_reward_min: -0.4779090388780566
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-19-17
  done: false
  episode_len_mean: 59.97794117647059
  episode_media: {}
  episode_reward_max: 1.8599877991388896
  episode_reward_mean: 0.20651104098111253
  episode_reward_min: -0.9609845047943981
  episodes_this_iter: 136
  episodes_total: 22637
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7150586836338043
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,207,23729.4,1654344,0.206511,1.85999,-0.960985,59.9779


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6649344
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9988539670313838
    agent_0_total_ball_to_goal_speed_reward_mean: 0.054612316416947575
    agent_0_total_ball_to_goal_speed_reward_min: -0.6273929438335082
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-21-10
  done: false
  episode_len_mean: 58.48888888888889
  episode_media: {}
  episode_reward_max: 1.5265855215987492
  episode_reward_mean: 0.10894584498933058
  episode_reward_min: -1.564115782562073
  episodes_this_iter: 135
  episodes_total: 22772
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7121994516849518
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,208,23842,1662336,0.108946,1.52659,-1.56412,58.4889


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6681312
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9569578674202209
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10224859674132486
    agent_0_total_ball_to_goal_speed_reward_min: -0.5290815161066862
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-23-02
  done: false
  episode_len_mean: 58.786259541984734
  episode_media: {}
  episode_reward_max: 1.3641705245258142
  episode_reward_mean: 0.22605182991873904
  episode_reward_min: -1.2065838232360213
  episodes_this_iter: 131
  episodes_total: 22903
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.717892256975174
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,209,23954.2,1670328,0.226052,1.36417,-1.20658,58.7863


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6713280
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8177600537600292
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05879896826261081
    agent_0_total_ball_to_goal_speed_reward_min: -0.5700144639242787
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-24-54
  done: false
  episode_len_mean: 61.1
  episode_media: {}
  episode_reward_max: 2.2253081043738066
  episode_reward_mean: 0.08760817715315285
  episode_reward_min: -0.6911552679447546
  episodes_this_iter: 140
  episodes_total: 23043
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7095148017406464
          entropy_coeff: 0.0
          kl: 0.0183

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,210,24066.4,1678320,0.0876082,2.22531,-0.691155,61.1


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6745248
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.2444268202490278
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08367796668932474
    agent_0_total_ball_to_goal_speed_reward_min: -0.5771234347430797
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-26-47
  done: false
  episode_len_mean: 55.347517730496456
  episode_media: {}
  episode_reward_max: 1.7222177726836188
  episode_reward_mean: 0.1467549577104265
  episode_reward_min: -0.7886394076174994
  episodes_this_iter: 141
  episodes_total: 23184
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7075126523971558
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,211,24178.9,1686312,0.146755,1.72222,-0.788639,55.3475


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6777216
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8864379807783496
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08541583324144542
    agent_0_total_ball_to_goal_speed_reward_min: -0.6853299324986571
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-28-45
  done: false
  episode_len_mean: 60.51538461538462
  episode_media: {}
  episode_reward_max: 1.2407190921859916
  episode_reward_mean: 0.10531681869186367
  episode_reward_min: -1.097625447070454
  episodes_this_iter: 130
  episodes_total: 23314
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7126084234714508
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,212,24297,1694304,0.105317,1.24072,-1.09763,60.5154


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6809184
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9196706681713788
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06814795040051262
    agent_0_total_ball_to_goal_speed_reward_min: -0.8720525439491441
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-30-36
  done: false
  episode_len_mean: 57.31617647058823
  episode_media: {}
  episode_reward_max: 1.8609479078622342
  episode_reward_mean: 0.1688711192582194
  episode_reward_min: -1.5073218338568042
  episodes_this_iter: 136
  episodes_total: 23450
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.705511536359787
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,213,24408.5,1702296,0.168871,1.86095,-1.50732,57.3162


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6841152
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8116532218033053
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06117653317309846
    agent_0_total_ball_to_goal_speed_reward_min: -0.546506121887758
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-32-27
  done: false
  episode_len_mean: 59.02962962962963
  episode_media: {}
  episode_reward_max: 1.4690371562420337
  episode_reward_mean: 0.09766188110129656
  episode_reward_min: -1.2118118334977326
  episodes_this_iter: 135
  episodes_total: 23585
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7055134615898132
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,214,24519.4,1710288,0.0976619,1.46904,-1.21181,59.0296


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6873120
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8153750503104372
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10715902287588815
    agent_0_total_ball_to_goal_speed_reward_min: -0.777226827392601
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-34-18
  done: false
  episode_len_mean: 59.859154929577464
  episode_media: {}
  episode_reward_max: 1.5540408986510945
  episode_reward_mean: 0.16027469378717155
  episode_reward_min: -1.2660369764606938
  episodes_this_iter: 142
  episodes_total: 23727
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7009641768932342
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,215,24630.2,1718280,0.160275,1.55404,-1.26604,59.8592


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6905088
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8074688814332172
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08085246551824822
    agent_0_total_ball_to_goal_speed_reward_min: -0.5587654308846043
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-36-09
  done: false
  episode_len_mean: 52.1578947368421
  episode_media: {}
  episode_reward_max: 1.6455423308862267
  episode_reward_mean: 0.18424787331607093
  episode_reward_min: -0.9586775361759581
  episodes_this_iter: 152
  episodes_total: 23879
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.700858094215393
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,216,24740.8,1726272,0.184248,1.64554,-0.958678,52.1579


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6937056
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7834902045301706
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06463363219298898
    agent_0_total_ball_to_goal_speed_reward_min: -0.5144293176872412
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-37-59
  done: false
  episode_len_mean: 58.05343511450382
  episode_media: {}
  episode_reward_max: 1.2546250546840025
  episode_reward_mean: 0.11095756568022705
  episode_reward_min: -0.9114199508161245
  episodes_this_iter: 131
  episodes_total: 24010
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7044024934768677
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,217,24850.6,1734264,0.110958,1.25463,-0.91142,58.0534


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 6969024
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8085831654271352
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05197897896455986
    agent_0_total_ball_to_goal_speed_reward_min: -0.7906724580091812
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-39-49
  done: false
  episode_len_mean: 69.23333333333333
  episode_media: {}
  episode_reward_max: 1.6084582142652866
  episode_reward_mean: 0.06340145566913687
  episode_reward_min: -1.1088365745398152
  episodes_this_iter: 120
  episodes_total: 24130
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7046656746864319
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,218,24961.1,1742256,0.0634015,1.60846,-1.10884,69.2333


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7000992
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6582507072998811
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08115297689471317
    agent_0_total_ball_to_goal_speed_reward_min: -0.6248413977727573
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-41-40
  done: false
  episode_len_mean: 62.853658536585364
  episode_media: {}
  episode_reward_max: 1.542111347173448
  episode_reward_mean: 0.1803675419819834
  episode_reward_min: -0.9387173863104756
  episodes_this_iter: 123
  episodes_total: 24253
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6951756830215454
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,219,25071.9,1750248,0.180368,1.54211,-0.938717,62.8537


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7032960
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7982308013308559
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0558579650821901
    agent_0_total_ball_to_goal_speed_reward_min: -0.5616629417529944
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-43-32
  done: false
  episode_len_mean: 68.29411764705883
  episode_media: {}
  episode_reward_max: 2.1900629287699633
  episode_reward_mean: 0.1069614728739123
  episode_reward_min: -1.5228397666317384
  episodes_this_iter: 119
  episodes_total: 24372
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.7001201612949371
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,220,25183.9,1758240,0.106961,2.19006,-1.52284,68.2941


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7064928
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8147384192870939
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08535416038180932
    agent_0_total_ball_to_goal_speed_reward_min: -0.4999106585823296
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-45-23
  done: false
  episode_len_mean: 59.6764705882353
  episode_media: {}
  episode_reward_max: 1.1458089751098717
  episode_reward_mean: 0.07427492761603864
  episode_reward_min: -1.5071731268761495
  episodes_this_iter: 136
  episodes_total: 24508
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6880358655452729
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,221,25294.8,1766232,0.0742749,1.14581,-1.50717,59.6765


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7096896
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8075151313217074
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07393180952423607
    agent_0_total_ball_to_goal_speed_reward_min: -0.6184956689516838
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-47-15
  done: false
  episode_len_mean: 63.46341463414634
  episode_media: {}
  episode_reward_max: 1.1363660356931422
  episode_reward_mean: 0.1404625973304141
  episode_reward_min: -0.7911226294368221
  episodes_this_iter: 123
  episodes_total: 24631
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.692458987236023
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,222,25406.3,1774224,0.140463,1.13637,-0.791123,63.4634


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7128864
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7550923293213093
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06471701397864221
    agent_0_total_ball_to_goal_speed_reward_min: -0.6250973624977455
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-49-06
  done: false
  episode_len_mean: 64.488
  episode_media: {}
  episode_reward_max: 1.4796464686662039
  episode_reward_mean: 0.12270015168563875
  episode_reward_min: -1.1559589409273474
  episodes_this_iter: 125
  episodes_total: 24756
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6976969771385193
          entropy_coeff: 0.0
          kl: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,223,25516.9,1782216,0.1227,1.47965,-1.15596,64.488


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7160832
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9580889224039272
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07019888080771297
    agent_0_total_ball_to_goal_speed_reward_min: -0.5077522973390485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-50-58
  done: false
  episode_len_mean: 58.992592592592594
  episode_media: {}
  episode_reward_max: 1.6227265669786508
  episode_reward_mean: 0.1057243670237602
  episode_reward_min: -1.0711601395044545
  episodes_this_iter: 135
  episodes_total: 24891
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6854904997348785
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,224,25629,1790208,0.105724,1.62273,-1.07116,58.9926


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7192800
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9660661654100143
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07740807610839841
    agent_0_total_ball_to_goal_speed_reward_min: -1.116694560243697
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-52-49
  done: false
  episode_len_mean: 63.951219512195124
  episode_media: {}
  episode_reward_max: 1.288173959946672
  episode_reward_mean: 0.15141127492808998
  episode_reward_min: -1.3133698447512043
  episodes_this_iter: 123
  episodes_total: 25014
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6898350551128387
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,225,25739.8,1798200,0.151411,1.28817,-1.31337,63.9512


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7224768
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7884858842323236
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07380560289887021
    agent_0_total_ball_to_goal_speed_reward_min: -0.5871653278726663
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-54-39
  done: false
  episode_len_mean: 68.59166666666667
  episode_media: {}
  episode_reward_max: 1.1367895562017856
  episode_reward_mean: 0.14786241723571186
  episode_reward_min: -1.006399595309615
  episodes_this_iter: 120
  episodes_total: 25134
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6862305314540863
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,226,25850.6,1806192,0.147862,1.13679,-1.0064,68.5917


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7256736
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6747834310594484
    agent_0_total_ball_to_goal_speed_reward_mean: 0.030085630159588008
    agent_0_total_ball_to_goal_speed_reward_min: -0.7792198217808122
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-56-30
  done: false
  episode_len_mean: 70.26548672566372
  episode_media: {}
  episode_reward_max: 1.1564005092798055
  episode_reward_mean: -0.01824557365603832
  episode_reward_min: -2.2368831610848305
  episodes_this_iter: 113
  episodes_total: 25247
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6885086243152618
          entropy_coeff: 0.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,227,25961,1814184,-0.0182456,1.1564,-2.23688,70.2655


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7288704
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9582170319554817
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10337328914028576
    agent_0_total_ball_to_goal_speed_reward_min: -0.49083816444110795
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_21-58-21
  done: false
  episode_len_mean: 61.784615384615385
  episode_media: {}
  episode_reward_max: 1.3615999098039528
  episode_reward_mean: 0.1976844064455411
  episode_reward_min: -1.3994982315519735
  episodes_this_iter: 130
  episodes_total: 25377
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6792340457439423
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,228,26071.9,1822176,0.197684,1.3616,-1.3995,61.7846


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7320672
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7272176287814502
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06846243066549457
    agent_0_total_ball_to_goal_speed_reward_min: -0.579720353000593
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-00-12
  done: false
  episode_len_mean: 59.06153846153846
  episode_media: {}
  episode_reward_max: 1.4097549634485014
  episode_reward_mean: 0.11064569372021947
  episode_reward_min: -0.9629107363097158
  episodes_this_iter: 130
  episodes_total: 25507
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6842001292705536
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,229,26183.2,1830168,0.110646,1.40975,-0.962911,59.0615


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7352640
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1864398398006923
    agent_0_total_ball_to_goal_speed_reward_mean: 0.058402606225518405
    agent_0_total_ball_to_goal_speed_reward_min: -0.8150131770947856
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-02-04
  done: false
  episode_len_mean: 62.8421052631579
  episode_media: {}
  episode_reward_max: 1.531396514216072
  episode_reward_mean: 0.04267189079860399
  episode_reward_min: -1.1575026369015577
  episodes_this_iter: 133
  episodes_total: 25640
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6811867773532867
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,230,26294.7,1838160,0.0426719,1.5314,-1.1575,62.8421


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7384608
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9184277990160296
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09481778835984794
    agent_0_total_ball_to_goal_speed_reward_min: -0.6797176158267736
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-03-55
  done: false
  episode_len_mean: 59.92481203007519
  episode_media: {}
  episode_reward_max: 1.403301949325401
  episode_reward_mean: 0.23515577703482654
  episode_reward_min: -1.2184142125676276
  episodes_this_iter: 133
  episodes_total: 25773
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6745049769878387
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,231,26406.2,1846152,0.235156,1.4033,-1.21841,59.9248


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7416576
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8069945262997134
    agent_0_total_ball_to_goal_speed_reward_mean: 0.059165111779593445
    agent_0_total_ball_to_goal_speed_reward_min: -0.4786269025572367
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-05-47
  done: false
  episode_len_mean: 63.877049180327866
  episode_media: {}
  episode_reward_max: 1.6959444817742866
  episode_reward_mean: 0.19765947460346492
  episode_reward_min: -0.701032106826625
  episodes_this_iter: 122
  episodes_total: 25895
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6797853264808654
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,232,26518.1,1854144,0.197659,1.69594,-0.701032,63.877


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7448544
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.758488791215665
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07428779687897871
    agent_0_total_ball_to_goal_speed_reward_min: -0.5376094168368639
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-07-38
  done: false
  episode_len_mean: 74.3177570093458
  episode_media: {}
  episode_reward_max: 0.9630394169868045
  episode_reward_mean: 0.053949282915294834
  episode_reward_min: -1.8809354288134343
  episodes_this_iter: 107
  episodes_total: 26002
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6763628079891205
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,233,26628.9,1862136,0.0539493,0.963039,-1.88094,74.3178


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7480512
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9065821273246266
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09403095483584024
    agent_0_total_ball_to_goal_speed_reward_min: -1.0911136525318927
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-09-36
  done: false
  episode_len_mean: 64.51587301587301
  episode_media: {}
  episode_reward_max: 1.2386593136517057
  episode_reward_mean: 0.1025895019484336
  episode_reward_min: -1.2802411739623318
  episodes_this_iter: 126
  episodes_total: 26128
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6653821184635162
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,234,26746.2,1870128,0.10259,1.23866,-1.28024,64.5159


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7512480
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6577681287015015
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04708956529515675
    agent_0_total_ball_to_goal_speed_reward_min: -0.49618763381902375
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-11-31
  done: false
  episode_len_mean: 58.65413533834587
  episode_media: {}
  episode_reward_max: 1.437014576704553
  episode_reward_mean: 0.0983671333386803
  episode_reward_min: -1.5353785549613328
  episodes_this_iter: 133
  episodes_total: 26261
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6682367551326752
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,235,26861.8,1878120,0.0983671,1.43701,-1.53538,58.6541


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7544448
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.937303726436473
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0730198694692965
    agent_0_total_ball_to_goal_speed_reward_min: -0.7940432637463284
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-13-30
  done: false
  episode_len_mean: 63.07462686567164
  episode_media: {}
  episode_reward_max: 1.2038022552408492
  episode_reward_mean: 0.07198172827885167
  episode_reward_min: -1.7412017304230463
  episodes_this_iter: 134
  episodes_total: 26395
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6666726455688476
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,236,26980.8,1886112,0.0719817,1.2038,-1.7412,63.0746


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7576416
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5761034223578197
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02913697972579112
    agent_0_total_ball_to_goal_speed_reward_min: -0.6528825410866874
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-15-27
  done: false
  episode_len_mean: 61.06153846153846
  episode_media: {}
  episode_reward_max: 1.8685065016710314
  episode_reward_mean: 0.12925568039029478
  episode_reward_min: -0.9113881295041393
  episodes_this_iter: 130
  episodes_total: 26525
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6619699873924255
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,237,27097.6,1894104,0.129256,1.86851,-0.911388,61.0615


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7608384
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.776443221983426
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06901983110091084
    agent_0_total_ball_to_goal_speed_reward_min: -0.7482316001008463
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-17-20
  done: false
  episode_len_mean: 58.52554744525548
  episode_media: {}
  episode_reward_max: 1.252004325228726
  episode_reward_mean: 0.1480480882442724
  episode_reward_min: -1.0747800560965177
  episodes_this_iter: 137
  episodes_total: 26662
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6590824880599976
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,238,27210.4,1902096,0.148048,1.252,-1.07478,58.5255


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7640352
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.800294896137697
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07096118882153969
    agent_0_total_ball_to_goal_speed_reward_min: -0.6464591313394432
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-19-15
  done: false
  episode_len_mean: 58.72992700729927
  episode_media: {}
  episode_reward_max: 1.2514413738732073
  episode_reward_mean: 0.1890030646216073
  episode_reward_min: -1.440719560768184
  episodes_this_iter: 137
  episodes_total: 26799
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.65768235039711
          entropy_coeff: 0.0
          kl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,239,27325,1910088,0.189003,1.25144,-1.44072,58.7299


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7672320
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7193850709822371
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07259255078503193
    agent_0_total_ball_to_goal_speed_reward_min: -0.6198392983923872
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-21-07
  done: false
  episode_len_mean: 62.552
  episode_media: {}
  episode_reward_max: 1.5159232700580834
  episode_reward_mean: 0.20269629335848116
  episode_reward_min: -0.8148196282474105
  episodes_this_iter: 125
  episodes_total: 26924
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6582971694469452
          entropy_coeff: 0.0
          kl: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,240,27437.2,1918080,0.202696,1.51592,-0.81482,62.552


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7704288
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1449105058713933
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05835602961781185
    agent_0_total_ball_to_goal_speed_reward_min: -0.5492304538865619
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-23-00
  done: false
  episode_len_mean: 64.24
  episode_media: {}
  episode_reward_max: 1.3852174559752144
  episode_reward_mean: 0.12192533947462317
  episode_reward_min: -0.7474669482630681
  episodes_this_iter: 125
  episodes_total: 27049
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6625881123542786
          entropy_coeff: 0.0
          kl: 0.018

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,241,27549.9,1926072,0.121925,1.38522,-0.747467,64.24


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7736256
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5939351468219266
    agent_0_total_ball_to_goal_speed_reward_mean: 0.060049901442348196
    agent_0_total_ball_to_goal_speed_reward_min: -0.5242134749280513
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-24-52
  done: false
  episode_len_mean: 59.315384615384616
  episode_media: {}
  episode_reward_max: 1.5265373454820672
  episode_reward_mean: 0.16835338899815672
  episode_reward_min: -1.1461545323536464
  episodes_this_iter: 130
  episodes_total: 27179
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.648540241241455
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,242,27662.1,1934064,0.168353,1.52654,-1.14615,59.3154


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7768224
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8569586226746093
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08099218235019225
    agent_0_total_ball_to_goal_speed_reward_min: -0.5180253036619643
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-26-44
  done: false
  episode_len_mean: 63.671875
  episode_media: {}
  episode_reward_max: 1.6342702825983144
  episode_reward_mean: 0.1497689263385349
  episode_reward_min: -1.394262197791523
  episodes_this_iter: 128
  episodes_total: 27307
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.654421311378479
          entropy_coeff: 0.0
          kl: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,243,27774.4,1942056,0.149769,1.63427,-1.39426,63.6719


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7800192
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8441664052740089
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06920204244955667
    agent_0_total_ball_to_goal_speed_reward_min: -0.7138246022695522
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-28-36
  done: false
  episode_len_mean: 62.207692307692305
  episode_media: {}
  episode_reward_max: 1.380575585325306
  episode_reward_mean: 0.19141491797536375
  episode_reward_min: -0.9517957790304468
  episodes_this_iter: 130
  episodes_total: 27437
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.657899115562439
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,244,27886.4,1950048,0.191415,1.38058,-0.951796,62.2077


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7832160
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6638139579775688
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07003563680537027
    agent_0_total_ball_to_goal_speed_reward_min: -0.4478663614602481
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-30-29
  done: false
  episode_len_mean: 56.716312056737586
  episode_media: {}
  episode_reward_max: 1.4127578826974414
  episode_reward_mean: 0.15718200020133016
  episode_reward_min: -0.8167984874534836
  episodes_this_iter: 141
  episodes_total: 27578
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6490068290233612
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,245,27998.6,1958040,0.157182,1.41276,-0.816798,56.7163


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7864128
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8356873925904312
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0733049280155631
    agent_0_total_ball_to_goal_speed_reward_min: -0.6724157655511258
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-32-21
  done: false
  episode_len_mean: 62.3515625
  episode_media: {}
  episode_reward_max: 1.6683545392402817
  episode_reward_mean: 0.20805327040704027
  episode_reward_min: -0.7410118369865673
  episodes_this_iter: 128
  episodes_total: 27706
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6548163697719575
          entropy_coeff: 0.0
          kl: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,246,28111.2,1966032,0.208053,1.66835,-0.741012,62.3516


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7896096
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.661513962233254
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05241085639340283
    agent_0_total_ball_to_goal_speed_reward_min: -0.5332448576687334
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-34-19
  done: false
  episode_len_mean: 58.722627737226276
  episode_media: {}
  episode_reward_max: 1.7186821544708135
  episode_reward_mean: 0.1277896551327605
  episode_reward_min: -1.3990502860775815
  episodes_this_iter: 137
  episodes_total: 27843
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6544964141845703
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,247,28228.4,1974024,0.12779,1.71868,-1.39905,58.7226


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7928064
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1960649241716803
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10438941616423425
    agent_0_total_ball_to_goal_speed_reward_min: -0.5185254956427805
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-36-21
  done: false
  episode_len_mean: 66.51666666666667
  episode_media: {}
  episode_reward_max: 1.5050506715168632
  episode_reward_mean: 0.19834166206375486
  episode_reward_min: -0.8028501028674402
  episodes_this_iter: 120
  episodes_total: 27963
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.658333960056305
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,248,28350.4,1982016,0.198342,1.50505,-0.80285,66.5167


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7960032
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8972322375948149
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07555217324822112
    agent_0_total_ball_to_goal_speed_reward_min: -0.5688028040028049
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-38-15
  done: false
  episode_len_mean: 57.61764705882353
  episode_media: {}
  episode_reward_max: 1.8520507336292966
  episode_reward_mean: 0.15322398729820796
  episode_reward_min: -0.9759647690631914
  episodes_this_iter: 136
  episodes_total: 28099
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.653917720079422
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,249,28464.9,1990008,0.153224,1.85205,-0.975965,57.6176


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 7992000
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0697354808142683
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08932080637314034
    agent_0_total_ball_to_goal_speed_reward_min: -0.5148871139777683
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-40-10
  done: false
  episode_len_mean: 63.09448818897638
  episode_media: {}
  episode_reward_max: 1.8509078937355832
  episode_reward_mean: 0.24466088049800574
  episode_reward_min: -0.6642885699877081
  episodes_this_iter: 127
  episodes_total: 28226
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6613833038806916
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,250,28579.3,1998000,0.244661,1.85091,-0.664289,63.0945


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8023968
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.748665258321999
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07510336539160263
    agent_0_total_ball_to_goal_speed_reward_min: -0.5226072063739465
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-42-05
  done: false
  episode_len_mean: 64.88983050847457
  episode_media: {}
  episode_reward_max: 1.8588632845368496
  episode_reward_mean: 0.22405739409629463
  episode_reward_min: -0.943235312043865
  episodes_this_iter: 118
  episodes_total: 28344
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6568138229846955
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,251,28693.9,2005992,0.224057,1.85886,-0.943235,64.8898


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8055936
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.017933683288444
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05904781275092533
    agent_0_total_ball_to_goal_speed_reward_min: -0.5704172996070912
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-44-00
  done: false
  episode_len_mean: 70.47413793103448
  episode_media: {}
  episode_reward_max: 1.4281537832257651
  episode_reward_mean: 0.2020923027656204
  episode_reward_min: -0.8095436183183289
  episodes_this_iter: 116
  episodes_total: 28460
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6514719910621644
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,252,28808.9,2013984,0.202092,1.42815,-0.809544,70.4741


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8087904
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9643630077139946
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05801816550178241
    agent_0_total_ball_to_goal_speed_reward_min: -0.5795457606413672
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-45-54
  done: false
  episode_len_mean: 59.35
  episode_media: {}
  episode_reward_max: 1.887548488518675
  episode_reward_mean: 0.15268085707654294
  episode_reward_min: -1.1219097429809128
  episodes_this_iter: 140
  episodes_total: 28600
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6563312051296234
          entropy_coeff: 0.0
          kl: 0.0195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,253,28923.5,2021976,0.152681,1.88755,-1.12191,59.35


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8119872
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9394548121876118
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06797879528578275
    agent_0_total_ball_to_goal_speed_reward_min: -0.49720029596755505
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-47-50
  done: false
  episode_len_mean: 58.82442748091603
  episode_media: {}
  episode_reward_max: 1.519289083796659
  episode_reward_mean: 0.17112561632112896
  episode_reward_min: -1.26275816251959
  episodes_this_iter: 131
  episodes_total: 28731
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6507649183273315
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,254,29038.8,2029968,0.171126,1.51929,-1.26276,58.8244


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8151840
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7032519356038088
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02187124317330797
    agent_0_total_ball_to_goal_speed_reward_min: -0.6324414416685039
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-49-44
  done: false
  episode_len_mean: 60.79259259259259
  episode_media: {}
  episode_reward_max: 1.695035053338433
  episode_reward_mean: 0.15549023600175507
  episode_reward_min: -0.9055600159076258
  episodes_this_iter: 135
  episodes_total: 28866
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6519154732227326
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,255,29153.2,2037960,0.15549,1.69504,-0.90556,60.7926


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8183808
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.944477679984608
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04272852951040872
    agent_0_total_ball_to_goal_speed_reward_min: -0.7450951435270184
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-51-40
  done: false
  episode_len_mean: 57.75
  episode_media: {}
  episode_reward_max: 1.7287068616661019
  episode_reward_mean: 0.18374916708099404
  episode_reward_min: -1.3469352453060852
  episodes_this_iter: 136
  episodes_total: 29002
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6553272421360016
          entropy_coeff: 0.0
          kl: 0.0191

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,256,29268.7,2045952,0.183749,1.72871,-1.34694,57.75


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8215776
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7081108393333247
    agent_0_total_ball_to_goal_speed_reward_mean: 0.044301383362770154
    agent_0_total_ball_to_goal_speed_reward_min: -0.689778036002478
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-53-35
  done: false
  episode_len_mean: 57.81021897810219
  episode_media: {}
  episode_reward_max: 1.4479186611684383
  episode_reward_mean: 0.12607779973764296
  episode_reward_min: -0.9928921451502162
  episodes_this_iter: 137
  episodes_total: 29139
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6507884345054626
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,257,29383.8,2053944,0.126078,1.44792,-0.992892,57.8102


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8247744
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8982292238337444
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06174091126181444
    agent_0_total_ball_to_goal_speed_reward_min: -0.8110635827279115
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-55-29
  done: false
  episode_len_mean: 70.2695652173913
  episode_media: {}
  episode_reward_max: 1.7944915948925604
  episode_reward_mean: 0.11972870779822395
  episode_reward_min: -1.3134281240605024
  episodes_this_iter: 115
  episodes_total: 29254
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6494387288093567
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,258,29498.1,2061936,0.119729,1.79449,-1.31343,70.2696


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8279712
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6564728376705812
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09416044522259216
    agent_0_total_ball_to_goal_speed_reward_min: -0.5336977985107081
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-57-25
  done: false
  episode_len_mean: 62.267716535433074
  episode_media: {}
  episode_reward_max: 1.5315454414441914
  episode_reward_mean: 0.18621456441171838
  episode_reward_min: -0.9662068407891535
  episodes_this_iter: 127
  episodes_total: 29381
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6478175508975983
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,259,29613.4,2069928,0.186215,1.53155,-0.966207,62.2677


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8311680
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.817119288098433
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07608261292700616
    agent_0_total_ball_to_goal_speed_reward_min: -0.47901925136187873
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_22-59-19
  done: false
  episode_len_mean: 67.18032786885246
  episode_media: {}
  episode_reward_max: 1.426377688017154
  episode_reward_mean: 0.1919718841963222
  episode_reward_min: -1.1891638373096232
  episodes_this_iter: 122
  episodes_total: 29503
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.643993064403534
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,260,29727.9,2077920,0.191972,1.42638,-1.18916,67.1803


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8343648
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8761728752788344
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10294337366178366
    agent_0_total_ball_to_goal_speed_reward_min: -0.5375558747039767
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-01-14
  done: false
  episode_len_mean: 61.125984251968504
  episode_media: {}
  episode_reward_max: 1.8076869866313634
  episode_reward_mean: 0.20619827048746256
  episode_reward_min: -0.7849213811616802
  episodes_this_iter: 127
  episodes_total: 29630
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6453724820613861
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,261,29842.7,2085912,0.206198,1.80769,-0.784921,61.126


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8375616
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6954541312156595
    agent_0_total_ball_to_goal_speed_reward_mean: 0.13174820617633612
    agent_0_total_ball_to_goal_speed_reward_min: -0.9397455692018303
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-03-09
  done: false
  episode_len_mean: 62.92248062015504
  episode_media: {}
  episode_reward_max: 1.9383064841312145
  episode_reward_mean: 0.21435692661308145
  episode_reward_min: -0.682159861099795
  episodes_this_iter: 129
  episodes_total: 29759
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6387804565429688
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,262,29957.8,2093904,0.214357,1.93831,-0.68216,62.9225


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8407584
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.23606422556653
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05499539031721782
    agent_0_total_ball_to_goal_speed_reward_min: -0.6646212096834029
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-05-08
  done: false
  episode_len_mean: 67.05982905982906
  episode_media: {}
  episode_reward_max: 1.3841809870745991
  episode_reward_mean: 0.14248453485936352
  episode_reward_min: -1.0931459337215417
  episodes_this_iter: 117
  episodes_total: 29876
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6438814001083374
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,263,30076.5,2101896,0.142485,1.38418,-1.09315,67.0598


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8439552
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7757893377484415
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05881876387245156
    agent_0_total_ball_to_goal_speed_reward_min: -0.524213935460663
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-07-02
  done: false
  episode_len_mean: 67.85123966942149
  episode_media: {}
  episode_reward_max: 1.4062851565949637
  episode_reward_mean: 0.2198044771830643
  episode_reward_min: -1.04538210349541
  episodes_this_iter: 121
  episodes_total: 29997
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6449731593132019
          entropy_coeff: 0.0
          k

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,264,30190.2,2109888,0.219804,1.40629,-1.04538,67.8512


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8471520
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7311335625480034
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10296794499074927
    agent_0_total_ball_to_goal_speed_reward_min: -0.5244350528445436
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-08-55
  done: false
  episode_len_mean: 67.84210526315789
  episode_media: {}
  episode_reward_max: 1.6463508650383472
  episode_reward_mean: 0.20682604942758379
  episode_reward_min: -2.25301061157007
  episodes_this_iter: 114
  episodes_total: 30111
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6431311540603638
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,265,30303.4,2117880,0.206826,1.64635,-2.25301,67.8421


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8503488
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9589432207871624
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09109435214012548
    agent_0_total_ball_to_goal_speed_reward_min: -0.5515983087793563
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-10-48
  done: false
  episode_len_mean: 66.78688524590164
  episode_media: {}
  episode_reward_max: 1.3928912053402542
  episode_reward_mean: 0.18765369373868213
  episode_reward_min: -1.036734463638199
  episodes_this_iter: 122
  episodes_total: 30233
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6392914958000183
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,266,30416.8,2125872,0.187654,1.39289,-1.03673,66.7869


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8535456
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7746156496723752
    agent_0_total_ball_to_goal_speed_reward_mean: 0.026081152926802565
    agent_0_total_ball_to_goal_speed_reward_min: -0.5684045817398155
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-12-44
  done: false
  episode_len_mean: 67.42735042735043
  episode_media: {}
  episode_reward_max: 1.1401464268725379
  episode_reward_mean: 0.08610638091198422
  episode_reward_min: -1.9054034684869383
  episodes_this_iter: 117
  episodes_total: 30350
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6447191865444183
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,267,30532,2133864,0.0861064,1.14015,-1.9054,67.4274


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8567424
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9645475124863266
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0711775440919229
    agent_0_total_ball_to_goal_speed_reward_min: -0.52040718693576
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-14-43
  done: false
  episode_len_mean: 62.152671755725194
  episode_media: {}
  episode_reward_max: 1.4475664362208154
  episode_reward_mean: 0.21244962182126437
  episode_reward_min: -0.9378657416464202
  episodes_this_iter: 131
  episodes_total: 30481
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6407130150794983
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,268,30651.2,2141856,0.21245,1.44757,-0.937866,62.1527


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8599392
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0463526476911724
    agent_0_total_ball_to_goal_speed_reward_mean: 0.1262573099510323
    agent_0_total_ball_to_goal_speed_reward_min: -0.5251248601295924
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-16-38
  done: false
  episode_len_mean: 61.40769230769231
  episode_media: {}
  episode_reward_max: 2.0521935633034203
  episode_reward_mean: 0.22937916971612746
  episode_reward_min: -1.2458129931400608
  episodes_this_iter: 130
  episodes_total: 30611
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6389470145702362
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,269,30765.8,2149848,0.229379,2.05219,-1.24581,61.4077


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8631360
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7999360489019982
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0680916780793821
    agent_0_total_ball_to_goal_speed_reward_min: -0.6570286218986097
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-18-34
  done: false
  episode_len_mean: 60.54135338345865
  episode_media: {}
  episode_reward_max: 1.3493466392701654
  episode_reward_mean: 0.10981847257880376
  episode_reward_min: -1.2286570809313548
  episodes_this_iter: 133
  episodes_total: 30744
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6427342109680175
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,270,30882.5,2157840,0.109818,1.34935,-1.22866,60.5414


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8663328
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8729942763134965
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09459587920808332
    agent_0_total_ball_to_goal_speed_reward_min: -0.5607566074241067
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-20-30
  done: false
  episode_len_mean: 68.44347826086957
  episode_media: {}
  episode_reward_max: 1.6568508954795909
  episode_reward_mean: 0.24542296099549496
  episode_reward_min: -0.831085735215721
  episodes_this_iter: 115
  episodes_total: 30859
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6443667759895325
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,271,30997.8,2165832,0.245423,1.65685,-0.831086,68.4435


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8695296
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9788035178347299
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10279352599762867
    agent_0_total_ball_to_goal_speed_reward_min: -0.6792436415959745
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-22-26
  done: false
  episode_len_mean: 69.10344827586206
  episode_media: {}
  episode_reward_max: 1.4456389182805074
  episode_reward_mean: 0.12472315727096545
  episode_reward_min: -0.9287740378687371
  episodes_this_iter: 116
  episodes_total: 30975
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6392884917259216
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,272,31113.9,2173824,0.124723,1.44564,-0.928774,69.1034


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8727264
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0516671768772556
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10007349452378765
    agent_0_total_ball_to_goal_speed_reward_min: -0.6012429430569115
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-24-22
  done: false
  episode_len_mean: 66.4274193548387
  episode_media: {}
  episode_reward_max: 1.5372616232550107
  episode_reward_mean: 0.18070926029620776
  episode_reward_min: -1.317192242594984
  episodes_this_iter: 124
  episodes_total: 31099
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6410253548622131
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,273,31229.7,2181816,0.180709,1.53726,-1.31719,66.4274


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8759232
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7701148090626992
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06917696661421496
    agent_0_total_ball_to_goal_speed_reward_min: -0.5442979823940227
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-26-17
  done: false
  episode_len_mean: 60.8062015503876
  episode_media: {}
  episode_reward_max: 1.428652453032973
  episode_reward_mean: 0.1622917115424876
  episode_reward_min: -0.8833493081292803
  episodes_this_iter: 129
  episodes_total: 31228
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6405677227973938
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,274,31344.7,2189808,0.162292,1.42865,-0.883349,60.8062


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8791200
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.720848907143071
    agent_0_total_ball_to_goal_speed_reward_mean: 0.054315966892881526
    agent_0_total_ball_to_goal_speed_reward_min: -0.7330288737927125
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-28-11
  done: false
  episode_len_mean: 65.31666666666666
  episode_media: {}
  episode_reward_max: 1.40971765764308
  episode_reward_mean: 0.2003524347174654
  episode_reward_min: -1.4758575693121152
  episodes_this_iter: 120
  episodes_total: 31348
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6475865550041199
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,275,31459.3,2197800,0.200352,1.40972,-1.47586,65.3167


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8823168
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9448522635958501
    agent_0_total_ball_to_goal_speed_reward_mean: 0.056988842995805845
    agent_0_total_ball_to_goal_speed_reward_min: -1.2286186127676044
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-30-07
  done: false
  episode_len_mean: 67.15044247787611
  episode_media: {}
  episode_reward_max: 1.0691735321777118
  episode_reward_mean: 0.19914975117811193
  episode_reward_min: -1.0845970819571282
  episodes_this_iter: 113
  episodes_total: 31461
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6413476777076721
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,276,31574.3,2205792,0.19915,1.06917,-1.0846,67.1504


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8855136
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8803136090114152
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06841488544177225
    agent_0_total_ball_to_goal_speed_reward_min: -0.4917852654433208
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-32-04
  done: false
  episode_len_mean: 71.77310924369748
  episode_media: {}
  episode_reward_max: 1.796434046618561
  episode_reward_mean: 0.13368497281617994
  episode_reward_min: -0.9574408482318377
  episodes_this_iter: 119
  episodes_total: 31580
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6397659122943878
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,277,31691.3,2213784,0.133685,1.79643,-0.957441,71.7731


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8887104
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0618217947682012
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05283544613455858
    agent_0_total_ball_to_goal_speed_reward_min: -0.6408992330396717
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-34-00
  done: false
  episode_len_mean: 64.77049180327869
  episode_media: {}
  episode_reward_max: 1.3798723730347493
  episode_reward_mean: 0.17121455266298136
  episode_reward_min: -1.4102379865185715
  episodes_this_iter: 122
  episodes_total: 31702
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6374128496646881
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,278,31807.4,2221776,0.171215,1.37987,-1.41024,64.7705


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8919072
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7878645322429029
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06374367016218545
    agent_0_total_ball_to_goal_speed_reward_min: -0.8618687899583741
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-35-54
  done: false
  episode_len_mean: 56.2
  episode_media: {}
  episode_reward_max: 1.615634210089558
  episode_reward_mean: 0.1805221414538844
  episode_reward_min: -0.7227515194310206
  episodes_this_iter: 140
  episodes_total: 31842
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6199939985275269
          entropy_coeff: 0.0
          kl: 0.019169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,279,31921.1,2229768,0.180522,1.61563,-0.722752,56.2


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8951040
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7838810317401008
    agent_0_total_ball_to_goal_speed_reward_mean: 0.068840286103585
    agent_0_total_ball_to_goal_speed_reward_min: -0.4278972853016015
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-37-47
  done: false
  episode_len_mean: 62.261538461538464
  episode_media: {}
  episode_reward_max: 1.7008147863413576
  episode_reward_mean: 0.16916473061089782
  episode_reward_min: -1.0065646041996665
  episodes_this_iter: 130
  episodes_total: 31972
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6315143015384674
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,280,32034,2237760,0.169165,1.70081,-1.00656,62.2615


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 8983008
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8290130164212968
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0517736391881413
    agent_0_total_ball_to_goal_speed_reward_min: -0.6063548209263947
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-39-44
  done: false
  episode_len_mean: 58.02158273381295
  episode_media: {}
  episode_reward_max: 2.0458542281426855
  episode_reward_mean: 0.13041544424407112
  episode_reward_min: -0.6023033458792253
  episodes_this_iter: 139
  episodes_total: 32111
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6244958691596985
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,281,32151.4,2245752,0.130415,2.04585,-0.602303,58.0216


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9014976
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7974422435385267
    agent_0_total_ball_to_goal_speed_reward_mean: 0.042191540634561736
    agent_0_total_ball_to_goal_speed_reward_min: -0.5830993012549824
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-41-39
  done: false
  episode_len_mean: 62.1796875
  episode_media: {}
  episode_reward_max: 1.3000848978557178
  episode_reward_mean: 0.10435491338065954
  episode_reward_min: -1.1615409233652272
  episodes_this_iter: 128
  episodes_total: 32239
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6362437653541565
          entropy_coeff: 0.0
          kl:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,282,32266.1,2253744,0.104355,1.30008,-1.16154,62.1797


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9046944
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8504510651939408
    agent_0_total_ball_to_goal_speed_reward_mean: 0.050825261435005764
    agent_0_total_ball_to_goal_speed_reward_min: -0.5496171843996188
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-43-39
  done: false
  episode_len_mean: 61.9296875
  episode_media: {}
  episode_reward_max: 1.3880316789207612
  episode_reward_mean: 0.13337580024324888
  episode_reward_min: -1.357220841203362
  episodes_this_iter: 128
  episodes_total: 32367
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6258971059322357
          entropy_coeff: 0.0
          kl: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,283,32385.9,2261736,0.133376,1.38803,-1.35722,61.9297


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9078912
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7497865535886857
    agent_0_total_ball_to_goal_speed_reward_mean: 0.059494234802636436
    agent_0_total_ball_to_goal_speed_reward_min: -0.7318373988321416
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-45-35
  done: false
  episode_len_mean: 61.51162790697674
  episode_media: {}
  episode_reward_max: 1.2867079403498676
  episode_reward_mean: 0.14102911063878035
  episode_reward_min: -0.9911518694876884
  episodes_this_iter: 129
  episodes_total: 32496
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6339413337707519
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,284,32502,2269728,0.141029,1.28671,-0.991152,61.5116


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9110880
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6806326283270259
    agent_0_total_ball_to_goal_speed_reward_mean: 0.040736193240381864
    agent_0_total_ball_to_goal_speed_reward_min: -0.7177364027199344
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-47-31
  done: false
  episode_len_mean: 65.6311475409836
  episode_media: {}
  episode_reward_max: 1.4244335985707397
  episode_reward_mean: 0.07714958867682399
  episode_reward_min: -0.7865293119592511
  episodes_this_iter: 122
  episodes_total: 32618
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6322631177902222
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,285,32618.1,2277720,0.0771496,1.42443,-0.786529,65.6311


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9142848
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6766398870740914
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04813107025549652
    agent_0_total_ball_to_goal_speed_reward_min: -0.627951043927362
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-49-26
  done: false
  episode_len_mean: 66.21138211382114
  episode_media: {}
  episode_reward_max: 1.4689435702888731
  episode_reward_mean: 0.1437265569763308
  episode_reward_min: -1.2234687053837696
  episodes_this_iter: 123
  episodes_total: 32741
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.631692667722702
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,286,32733.2,2285712,0.143727,1.46894,-1.22347,66.2114


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9174816
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8650116900088283
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05198219815522884
    agent_0_total_ball_to_goal_speed_reward_min: -0.5067628737662538
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-51-23
  done: false
  episode_len_mean: 64.232
  episode_media: {}
  episode_reward_max: 1.5821291682303154
  episode_reward_mean: 0.07605592212685827
  episode_reward_min: -0.8555544689316394
  episodes_this_iter: 125
  episodes_total: 32866
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6264503207206726
          entropy_coeff: 0.0
          kl: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,287,32850.1,2293704,0.0760559,1.58213,-0.855554,64.232


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9206784
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6663904186999997
    agent_0_total_ball_to_goal_speed_reward_mean: 0.027165626184395216
    agent_0_total_ball_to_goal_speed_reward_min: -0.496366314852371
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-53-19
  done: false
  episode_len_mean: 60.77272727272727
  episode_media: {}
  episode_reward_max: 2.200783241502629
  episode_reward_mean: 0.1325790289392442
  episode_reward_min: -1.0746234329663324
  episodes_this_iter: 132
  episodes_total: 32998
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6268656358718873
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,288,32966.3,2301696,0.132579,2.20078,-1.07462,60.7727


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9238752
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8738484475266671
    agent_0_total_ball_to_goal_speed_reward_mean: 0.031321683995474435
    agent_0_total_ball_to_goal_speed_reward_min: -0.5472425054673437
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-55-16
  done: false
  episode_len_mean: 59.746153846153845
  episode_media: {}
  episode_reward_max: 1.5870515638365075
  episode_reward_mean: 0.07549601559977322
  episode_reward_min: -0.809115853449045
  episodes_this_iter: 130
  episodes_total: 33128
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.621432463169098
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,289,33082.7,2309688,0.075496,1.58705,-0.809116,59.7462


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9270720
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0720527198745264
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05556676199490437
    agent_0_total_ball_to_goal_speed_reward_min: -0.7625102653565271
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-57-13
  done: false
  episode_len_mean: 65.99186991869918
  episode_media: {}
  episode_reward_max: 1.3578227436177328
  episode_reward_mean: 0.1325792233389558
  episode_reward_min: -1.5116165947571563
  episodes_this_iter: 123
  episodes_total: 33251
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6220711541175842
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,290,33200.1,2317680,0.132579,1.35782,-1.51162,65.9919


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9302688
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6328641687304076
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06846751924309087
    agent_0_total_ball_to_goal_speed_reward_min: -0.3941988122864386
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-10_23-59-14
  done: false
  episode_len_mean: 67.49579831932773
  episode_media: {}
  episode_reward_max: 1.5075958075864058
  episode_reward_mean: 0.12231862582316592
  episode_reward_min: -0.7837935594964971
  episodes_this_iter: 119
  episodes_total: 33370
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.629621933221817
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,291,33320.7,2325672,0.122319,1.5076,-0.783794,67.4958


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9334656
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.920676887800925
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09650147999026241
    agent_0_total_ball_to_goal_speed_reward_min: -0.5842164977501771
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-01-10
  done: false
  episode_len_mean: 60.15555555555556
  episode_media: {}
  episode_reward_max: 1.3906545511467905
  episode_reward_mean: 0.18215317318336513
  episode_reward_min: -1.1175228835406843
  episodes_this_iter: 135
  episodes_total: 33505
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6233107702732086
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,292,33436.2,2333664,0.182153,1.39065,-1.11752,60.1556


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9366624
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8536680630775519
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0930438423725069
    agent_0_total_ball_to_goal_speed_reward_min: -0.52944089079449
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-03-05
  done: false
  episode_len_mean: 62.475806451612904
  episode_media: {}
  episode_reward_max: 1.4257623420737597
  episode_reward_mean: 0.16390998142446309
  episode_reward_min: -0.6976986354183721
  episodes_this_iter: 124
  episodes_total: 33629
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6244314968585968
          entropy_coeff: 0.0
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,293,33551.2,2341656,0.16391,1.42576,-0.697699,62.4758


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9398592
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7173659136450413
    agent_0_total_ball_to_goal_speed_reward_mean: 0.048037483362639105
    agent_0_total_ball_to_goal_speed_reward_min: -0.8785934290311014
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-05-00
  done: false
  episode_len_mean: 62.42063492063492
  episode_media: {}
  episode_reward_max: 1.6267566814357588
  episode_reward_mean: 0.14025463911586777
  episode_reward_min: -1.378383677111545
  episodes_this_iter: 126
  episodes_total: 33755
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6205927083492279
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,294,33666.4,2349648,0.140255,1.62676,-1.37838,62.4206


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9430560
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1581639329429396
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06348228847303398
    agent_0_total_ball_to_goal_speed_reward_min: -0.5053502281893423
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-06-55
  done: false
  episode_len_mean: 62.19847328244275
  episode_media: {}
  episode_reward_max: 1.5221710508451745
  episode_reward_mean: 0.11429030482678237
  episode_reward_min: -0.922410211763194
  episodes_this_iter: 131
  episodes_total: 33886
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6184326348304748
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,295,33781.5,2357640,0.11429,1.52217,-0.92241,62.1985


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9462528
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6327095048756181
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06418206302226635
    agent_0_total_ball_to_goal_speed_reward_min: -0.5380503184619537
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-08-51
  done: false
  episode_len_mean: 59.2
  episode_media: {}
  episode_reward_max: 1.5017484567983432
  episode_reward_mean: 0.19044415064641815
  episode_reward_min: -1.560300257978371
  episodes_this_iter: 135
  episodes_total: 34021
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6222650685310364
          entropy_coeff: 0.0
          kl: 0.01958

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,296,33897.3,2365632,0.190444,1.50175,-1.5603,59.2


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9494496
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8338946414253428
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02941215958046121
    agent_0_total_ball_to_goal_speed_reward_min: -0.8869392949961852
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-10-52
  done: false
  episode_len_mean: 63.66393442622951
  episode_media: {}
  episode_reward_max: 1.141109056646624
  episode_reward_mean: 0.1322163011109317
  episode_reward_min: -1.010346674509538
  episodes_this_iter: 122
  episodes_total: 34143
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6165747122764588
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,297,34017.9,2373624,0.132216,1.14111,-1.01035,63.6639


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9526464
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.729445917693011
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02819239051402832
    agent_0_total_ball_to_goal_speed_reward_min: -0.6672266231894434
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-12-50
  done: false
  episode_len_mean: 63.91338582677165
  episode_media: {}
  episode_reward_max: 1.1288700529024323
  episode_reward_mean: 0.03518974969077218
  episode_reward_min: -2.308015364212506
  episodes_this_iter: 127
  episodes_total: 34270
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.620395884513855
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,298,34136.4,2381616,0.0351897,1.12887,-2.30802,63.9134


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9558432
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6037821389513803
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04843206218570955
    agent_0_total_ball_to_goal_speed_reward_min: -0.6007495381321343
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-14-44
  done: false
  episode_len_mean: 62.40298507462686
  episode_media: {}
  episode_reward_max: 1.6384369749006815
  episode_reward_mean: 0.20897477595430483
  episode_reward_min: -0.970920953565247
  episodes_this_iter: 134
  episodes_total: 34404
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6166097514629364
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,299,34249.9,2389608,0.208975,1.63844,-0.970921,62.403


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9590400
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6680043616118752
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03820526869623546
    agent_0_total_ball_to_goal_speed_reward_min: -0.49989663344552765
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-16-39
  done: false
  episode_len_mean: 57.15107913669065
  episode_media: {}
  episode_reward_max: 1.411642440308475
  episode_reward_mean: 0.1798375848577445
  episode_reward_min: -1.2750371364469988
  episodes_this_iter: 139
  episodes_total: 34543
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6126400314569473
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,300,34365.3,2397600,0.179838,1.41164,-1.27504,57.1511


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9622368
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7414560678002079
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04717394883008761
    agent_0_total_ball_to_goal_speed_reward_min: -0.5114185948635197
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-18-36
  done: false
  episode_len_mean: 56.35507246376812
  episode_media: {}
  episode_reward_max: 1.6145761734229804
  episode_reward_mean: 0.13712067497741617
  episode_reward_min: -1.0184514885633997
  episodes_this_iter: 138
  episodes_total: 34681
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6193319497108459
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,301,34482.5,2405592,0.137121,1.61458,-1.01845,56.3551


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9654336
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8261584588859475
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0613541965861319
    agent_0_total_ball_to_goal_speed_reward_min: -0.9327935820823282
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-20-32
  done: false
  episode_len_mean: 59.25581395348837
  episode_media: {}
  episode_reward_max: 1.4555659134739884
  episode_reward_mean: 0.06546690352847861
  episode_reward_min: -2.5609359769390005
  episodes_this_iter: 129
  episodes_total: 34810
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6198442897796631
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,302,34597.7,2413584,0.0654669,1.45557,-2.56094,59.2558


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9686304
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.195578459280388
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06941468541878966
    agent_0_total_ball_to_goal_speed_reward_min: -0.594186228779082
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-22-29
  done: false
  episode_len_mean: 70.42241379310344
  episode_media: {}
  episode_reward_max: 1.7542245513247292
  episode_reward_mean: 0.2496538974093145
  episode_reward_min: -0.8143133051302933
  episodes_this_iter: 116
  episodes_total: 34926
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6211060428619385
          entropy_coeff: 0.0
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,303,34714.8,2421576,0.249654,1.75422,-0.814313,70.4224


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9718272
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9857802071684535
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09217527205203024
    agent_0_total_ball_to_goal_speed_reward_min: -0.5917649542893307
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-24-24
  done: false
  episode_len_mean: 64.7578125
  episode_media: {}
  episode_reward_max: 1.3903393401097577
  episode_reward_mean: 0.17878878040810264
  episode_reward_min: -1.0998270673986044
  episodes_this_iter: 128
  episodes_total: 35054
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.0125
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6237432647943497
          entropy_coeff: 0.0
          kl: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,304,34829.5,2429568,0.178789,1.39034,-1.09983,64.7578


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9750240
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8998015391773505
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05716896405920822
    agent_0_total_ball_to_goal_speed_reward_min: -0.692847974117064
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-26-22
  done: false
  episode_len_mean: 60.08661417322835
  episode_media: {}
  episode_reward_max: 1.224814612288053
  episode_reward_mean: 0.16998101535187532
  episode_reward_min: -1.1333485580858502
  episodes_this_iter: 127
  episodes_total: 35181
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6147405726909637
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,305,34947.5,2437560,0.169981,1.22481,-1.13335,60.0866


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9782208
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9737484928329673
    agent_0_total_ball_to_goal_speed_reward_mean: 0.052678226126439166
    agent_0_total_ball_to_goal_speed_reward_min: -0.5383289139816569
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-28-18
  done: false
  episode_len_mean: 63.11278195488722
  episode_media: {}
  episode_reward_max: 1.3260541983894578
  episode_reward_mean: 0.2635520122996095
  episode_reward_min: -1.117119736090896
  episodes_this_iter: 133
  episodes_total: 35314
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6130141174793243
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,306,35063.4,2445552,0.263552,1.32605,-1.11712,63.1128


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9814176
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9095034207051844
    agent_0_total_ball_to_goal_speed_reward_mean: 0.050425969867914956
    agent_0_total_ball_to_goal_speed_reward_min: -0.7150308299768452
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-30-13
  done: false
  episode_len_mean: 64.16260162601625
  episode_media: {}
  episode_reward_max: 1.127447667106674
  episode_reward_mean: 0.19230395571707462
  episode_reward_min: -1.1164243462052728
  episodes_this_iter: 123
  episodes_total: 35437
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6122920184135437
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,307,35178.9,2453544,0.192304,1.12745,-1.11642,64.1626


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9846144
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9168746855516415
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10491333790888283
    agent_0_total_ball_to_goal_speed_reward_min: -0.4625919688292012
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-32-08
  done: false
  episode_len_mean: 55.55172413793103
  episode_media: {}
  episode_reward_max: 1.3622539287891455
  episode_reward_mean: 0.2236288585114427
  episode_reward_min: -1.0518881613997744
  episodes_this_iter: 145
  episodes_total: 35582
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6096376724243164
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,308,35294.1,2461536,0.223629,1.36225,-1.05189,55.5517


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9878112
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8429393252265265
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09080899919752629
    agent_0_total_ball_to_goal_speed_reward_min: -0.7055993174737523
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-34-05
  done: false
  episode_len_mean: 64.58196721311475
  episode_media: {}
  episode_reward_max: 1.2350405732747656
  episode_reward_mean: 0.2068716703660121
  episode_reward_min: -0.8379069690577383
  episodes_this_iter: 122
  episodes_total: 35704
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6181747789382934
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,309,35410.6,2469528,0.206872,1.23504,-0.837907,64.582


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9910080
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0135121103352027
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09251787522342957
    agent_0_total_ball_to_goal_speed_reward_min: -0.4793350086992086
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-36-00
  done: false
  episode_len_mean: 67.73504273504274
  episode_media: {}
  episode_reward_max: 1.3080560212963752
  episode_reward_mean: 0.17598901056224983
  episode_reward_min: -0.7793561367567827
  episodes_this_iter: 117
  episodes_total: 35821
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.615296306848526
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,310,35525.8,2477520,0.175989,1.30806,-0.779356,67.735


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9942048
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7516175473084983
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04666462576288718
    agent_0_total_ball_to_goal_speed_reward_min: -0.7928606709075302
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-37-56
  done: false
  episode_len_mean: 65.984
  episode_media: {}
  episode_reward_max: 1.5547533393571018
  episode_reward_mean: 0.01348321106210295
  episode_reward_min: -1.7029173887746234
  episodes_this_iter: 125
  episodes_total: 35946
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6112587840557099
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,311,35641.1,2485512,0.0134832,1.55475,-1.70292,65.984


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 9974016
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9173645796092771
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06791037399224194
    agent_0_total_ball_to_goal_speed_reward_min: -0.5561511297624347
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-39-51
  done: false
  episode_len_mean: 62.072
  episode_media: {}
  episode_reward_max: 1.4985053980638217
  episode_reward_mean: 0.15985984809409934
  episode_reward_min: -0.7522163642924785
  episodes_this_iter: 125
  episodes_total: 36071
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6155141098499298
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,312,35756.7,2493504,0.15986,1.49851,-0.752216,62.072


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10005984
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7586527167772058
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05698774340540993
    agent_0_total_ball_to_goal_speed_reward_min: -0.460454956975032
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-41-50
  done: false
  episode_len_mean: 65.31451612903226
  episode_media: {}
  episode_reward_max: 1.2638573751080862
  episode_reward_mean: 0.21874024771076558
  episode_reward_min: -0.743132246055346
  episodes_this_iter: 124
  episodes_total: 36195
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6145706720352173
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,313,35875,2501496,0.21874,1.26386,-0.743132,65.3145


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10037952
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9916192059876803
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09812419058781331
    agent_0_total_ball_to_goal_speed_reward_min: -0.5948850914900251
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-43-48
  done: false
  episode_len_mean: 68.16964285714286
  episode_media: {}
  episode_reward_max: 1.1991877293556805
  episode_reward_mean: 0.19912999231324588
  episode_reward_min: -0.7636649095671326
  episodes_this_iter: 112
  episodes_total: 36307
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6211894288063049
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,314,35993.5,2509488,0.19913,1.19919,-0.763665,68.1696


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10069920
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.010636930951309
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03751899157103175
    agent_0_total_ball_to_goal_speed_reward_min: -0.6819372789261404
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-45-52
  done: false
  episode_len_mean: 66.8267716535433
  episode_media: {}
  episode_reward_max: 1.4963308997750722
  episode_reward_mean: 0.13838289499300285
  episode_reward_min: -0.9500926583503237
  episodes_this_iter: 127
  episodes_total: 36434
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6134092404842376
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,315,36116.7,2517480,0.138383,1.49633,-0.950093,66.8268


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10101888
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0973917322947322
    agent_0_total_ball_to_goal_speed_reward_mean: 0.038709725709719145
    agent_0_total_ball_to_goal_speed_reward_min: -0.47858304469556917
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-47-48
  done: false
  episode_len_mean: 63.07086614173228
  episode_media: {}
  episode_reward_max: 1.8215389886910303
  episode_reward_mean: 0.15898771405263343
  episode_reward_min: -0.9115507706449764
  episodes_this_iter: 127
  episodes_total: 36561
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6056524639129639
          entropy_coe

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,316,36233.6,2525472,0.158988,1.82154,-0.911551,63.0709


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10133856
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6676143419293513
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04763386070036245
    agent_0_total_ball_to_goal_speed_reward_min: -0.6131085264821031
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-49-46
  done: false
  episode_len_mean: 57.85606060606061
  episode_media: {}
  episode_reward_max: 1.3061357196021435
  episode_reward_mean: 0.1570030144102402
  episode_reward_min: -0.9450875312822589
  episodes_this_iter: 132
  episodes_total: 36693
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6156780698299408
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,317,36350.9,2533464,0.157003,1.30614,-0.945088,57.8561


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10165824
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.870001288107234
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06012557760378779
    agent_0_total_ball_to_goal_speed_reward_min: -0.5362000665686273
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-51-45
  done: false
  episode_len_mean: 65.39516129032258
  episode_media: {}
  episode_reward_max: 1.7810436736718365
  episode_reward_mean: 0.15525471955763204
  episode_reward_min: -1.6209357193152396
  episodes_this_iter: 124
  episodes_total: 36817
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6061108772754669
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,318,36470,2541456,0.155255,1.78104,-1.62094,65.3952


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10197792
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7455352058492947
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05320791419833695
    agent_0_total_ball_to_goal_speed_reward_min: -0.6799727963014541
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-53-43
  done: false
  episode_len_mean: 67.2892561983471
  episode_media: {}
  episode_reward_max: 1.238872112584924
  episode_reward_mean: 0.11686421765040278
  episode_reward_min: -1.3612677663989894
  episodes_this_iter: 121
  episodes_total: 36938
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6068947141170502
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,319,36588,2549448,0.116864,1.23887,-1.36127,67.2893


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10229760
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0104810058666354
    agent_0_total_ball_to_goal_speed_reward_mean: 0.055674387246210995
    agent_0_total_ball_to_goal_speed_reward_min: -0.6749102448451515
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-55-41
  done: false
  episode_len_mean: 68.98181818181818
  episode_media: {}
  episode_reward_max: 1.441816840363626
  episode_reward_mean: 0.17687993133561994
  episode_reward_min: -1.0783754127998706
  episodes_this_iter: 110
  episodes_total: 37048
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6168802881240845
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,320,36705.8,2557440,0.17688,1.44182,-1.07838,68.9818


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10261728
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8878553297301713
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08918770461745032
    agent_0_total_ball_to_goal_speed_reward_min: -0.6548058313899651
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-57-36
  done: false
  episode_len_mean: 59.31386861313869
  episode_media: {}
  episode_reward_max: 1.3955474479760737
  episode_reward_mean: 0.1786604089120061
  episode_reward_min: -0.95324604777504
  episodes_this_iter: 137
  episodes_total: 37185
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6016603008508682
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,321,36821.1,2565432,0.17866,1.39555,-0.953246,59.3139


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10293696
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8501936987485846
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08712181524869887
    agent_0_total_ball_to_goal_speed_reward_min: -0.8280550760785268
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_00-59-32
  done: false
  episode_len_mean: 68.30508474576271
  episode_media: {}
  episode_reward_max: 1.2013086292547719
  episode_reward_mean: 0.14526262521870775
  episode_reward_min: -0.9820026573690015
  episodes_this_iter: 118
  episodes_total: 37303
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.605624504327774
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,322,36936.4,2573424,0.145263,1.20131,-0.982003,68.3051


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10325664
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.984207796867755
    agent_0_total_ball_to_goal_speed_reward_mean: 0.11995307008335299
    agent_0_total_ball_to_goal_speed_reward_min: -0.48802043840960624
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-01-28
  done: false
  episode_len_mean: 69.82456140350877
  episode_media: {}
  episode_reward_max: 1.526211686471163
  episode_reward_mean: 0.12595076019912166
  episode_reward_min: -1.669207424964832
  episodes_this_iter: 114
  episodes_total: 37417
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6081269822120666
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,323,37052.3,2581416,0.125951,1.52621,-1.66921,69.8246


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10357632
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.007747157577559
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09245938465312627
    agent_0_total_ball_to_goal_speed_reward_min: -0.40110317382603
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-03-23
  done: false
  episode_len_mean: 61.20472440944882
  episode_media: {}
  episode_reward_max: 1.1789179417757332
  episode_reward_mean: 0.14957918345838317
  episode_reward_min: -0.6244427189116237
  episodes_this_iter: 127
  episodes_total: 37544
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.607130126953125
          entropy_coeff: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,324,37167.9,2589408,0.149579,1.17892,-0.624443,61.2047


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10389600
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6437946554135335
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04769507430677154
    agent_0_total_ball_to_goal_speed_reward_min: -0.7054419386643433
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-05-19
  done: false
  episode_len_mean: 54.83225806451613
  episode_media: {}
  episode_reward_max: 1.5111068679696067
  episode_reward_mean: 0.14105785551460526
  episode_reward_min: -1.2138547740869523
  episodes_this_iter: 155
  episodes_total: 37699
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6049013195037842
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,325,37283.1,2597400,0.141058,1.51111,-1.21385,54.8323


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10421568
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9207833189309222
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06262392326266625
    agent_0_total_ball_to_goal_speed_reward_min: -0.6019466042081707
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-07-14
  done: false
  episode_len_mean: 57.948905109489054
  episode_media: {}
  episode_reward_max: 1.6657449965948583
  episode_reward_mean: 0.11245271186384465
  episode_reward_min: -1.087569169744786
  episodes_this_iter: 137
  episodes_total: 37836
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.60441858959198
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,326,37398.3,2605392,0.112453,1.66574,-1.08757,57.9489


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10453536
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9829979348517226
    agent_0_total_ball_to_goal_speed_reward_mean: 0.1010902261454762
    agent_0_total_ball_to_goal_speed_reward_min: -0.5480203131490583
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-09-09
  done: false
  episode_len_mean: 59.7557251908397
  episode_media: {}
  episode_reward_max: 1.6517624077131088
  episode_reward_mean: 0.18758755505948727
  episode_reward_min: -1.4573158509822388
  episodes_this_iter: 131
  episodes_total: 37967
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6047462702989578
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,327,37513.8,2613384,0.187588,1.65176,-1.45732,59.7557


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10485504
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0570166058669461
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0640952742911989
    agent_0_total_ball_to_goal_speed_reward_min: -0.5601484075129457
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-11-07
  done: false
  episode_len_mean: 74.34862385321101
  episode_media: {}
  episode_reward_max: 1.5344425873058505
  episode_reward_mean: 0.1361162731344884
  episode_reward_min: -1.1824868888377025
  episodes_this_iter: 109
  episodes_total: 38076
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6117045929431916
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,328,37631.5,2621376,0.136116,1.53444,-1.18249,74.3486


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10517472
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6834690920930631
    agent_0_total_ball_to_goal_speed_reward_mean: 0.11997519656233424
    agent_0_total_ball_to_goal_speed_reward_min: -0.491622778871283
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-13-05
  done: false
  episode_len_mean: 57.391304347826086
  episode_media: {}
  episode_reward_max: 1.3498097447236554
  episode_reward_mean: 0.2695890338447144
  episode_reward_min: -0.6318386336194202
  episodes_this_iter: 138
  episodes_total: 38214
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6067900562286377
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,329,37749.3,2629368,0.269589,1.34981,-0.631839,57.3913


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10549440
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9790884335550301
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07733467591301717
    agent_0_total_ball_to_goal_speed_reward_min: -0.5221744168096752
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-15-01
  done: false
  episode_len_mean: 68.77118644067797
  episode_media: {}
  episode_reward_max: 2.222302626313242
  episode_reward_mean: 0.15745596053596514
  episode_reward_min: -1.1842916129952754
  episodes_this_iter: 118
  episodes_total: 38332
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6043518148660659
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,330,37865,2637360,0.157456,2.2223,-1.18429,68.7712


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10581408
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7844718922665291
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07014854910174524
    agent_0_total_ball_to_goal_speed_reward_min: -0.4814728937641011
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-16-59
  done: false
  episode_len_mean: 61.768
  episode_media: {}
  episode_reward_max: 1.753651598160579
  episode_reward_mean: 0.20080064396031755
  episode_reward_min: -1.1092932473498252
  episodes_this_iter: 125
  episodes_total: 38457
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6050465292930604
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,331,37983.3,2645352,0.200801,1.75365,-1.10929,61.768


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10613376
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8535390599338128
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07648741793668658
    agent_0_total_ball_to_goal_speed_reward_min: -0.8573626028233414
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-18-55
  done: false
  episode_len_mean: 61.35606060606061
  episode_media: {}
  episode_reward_max: 1.3623669730140358
  episode_reward_mean: 0.1732586841656039
  episode_reward_min: -1.2359448802671933
  episodes_this_iter: 132
  episodes_total: 38589
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6036608227491379
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,332,38099,2653344,0.173259,1.36237,-1.23594,61.3561


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10645344
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.822761661174116
    agent_0_total_ball_to_goal_speed_reward_mean: 0.055179721717348265
    agent_0_total_ball_to_goal_speed_reward_min: -0.6918030322533422
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-20-51
  done: false
  episode_len_mean: 58.76428571428571
  episode_media: {}
  episode_reward_max: 1.2195183005169512
  episode_reward_mean: 0.11754821184007273
  episode_reward_min: -1.1081508817827252
  episodes_this_iter: 140
  episodes_total: 38729
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6067256124019623
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,333,38215.4,2661336,0.117548,1.21952,-1.10815,58.7643


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10677312
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7739811875708207
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07448150295645294
    agent_0_total_ball_to_goal_speed_reward_min: -0.7277274848711217
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-22-47
  done: false
  episode_len_mean: 56.18840579710145
  episode_media: {}
  episode_reward_max: 1.2311174645928942
  episode_reward_mean: 0.1403161926139612
  episode_reward_min: -1.6482556742340062
  episodes_this_iter: 138
  episodes_total: 38867
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6096283428668976
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,334,38331.1,2669328,0.140316,1.23112,-1.64826,56.1884


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10709280
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1216185960212202
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04410079533280974
    agent_0_total_ball_to_goal_speed_reward_min: -0.5938215687887557
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-24-43
  done: false
  episode_len_mean: 65.09836065573771
  episode_media: {}
  episode_reward_max: 1.61472679038119
  episode_reward_mean: 0.12646029476581575
  episode_reward_min: -1.4718890006132415
  episodes_this_iter: 122
  episodes_total: 38989
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6029817938804627
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,335,38447,2677320,0.12646,1.61473,-1.47189,65.0984


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10741248
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6861053354966625
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05525365094532109
    agent_0_total_ball_to_goal_speed_reward_min: -0.5415436427783509
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-26-39
  done: false
  episode_len_mean: 65.77235772357723
  episode_media: {}
  episode_reward_max: 1.1861904223281075
  episode_reward_mean: 0.11228576248831905
  episode_reward_min: -0.8258640120725929
  episodes_this_iter: 123
  episodes_total: 39112
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6060828392505646
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,336,38563.1,2685312,0.112286,1.18619,-0.825864,65.7724


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10773216
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9904974508332337
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05842245862334817
    agent_0_total_ball_to_goal_speed_reward_min: -0.5677232608215772
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-28-35
  done: false
  episode_len_mean: 62.09848484848485
  episode_media: {}
  episode_reward_max: 1.7770086255697022
  episode_reward_mean: 0.1469592189725251
  episode_reward_min: -1.3112768489508637
  episodes_this_iter: 132
  episodes_total: 39244
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6059355555772782
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,337,38678.7,2693304,0.146959,1.77701,-1.31128,62.0985


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10805184
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8062494591079352
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07183475062657682
    agent_0_total_ball_to_goal_speed_reward_min: -0.5036952548606654
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-30-31
  done: false
  episode_len_mean: 65.38655462184875
  episode_media: {}
  episode_reward_max: 1.7427510736656489
  episode_reward_mean: 0.15458456733049514
  episode_reward_min: -1.4514950119892038
  episodes_this_iter: 119
  episodes_total: 39363
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.604353376030922
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,338,38794.6,2701296,0.154585,1.74275,-1.4515,65.3866


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10837152
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0405050192279557
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08357102967939478
    agent_0_total_ball_to_goal_speed_reward_min: -0.8341510728177955
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-32-27
  done: false
  episode_len_mean: 57.40875912408759
  episode_media: {}
  episode_reward_max: 1.815517026623756
  episode_reward_mean: 0.14296203883461933
  episode_reward_min: -1.119498342206851
  episodes_this_iter: 137
  episodes_total: 39500
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.600850216627121
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,339,38910.5,2709288,0.142962,1.81552,-1.1195,57.4088


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10869120
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7943654849162384
    agent_0_total_ball_to_goal_speed_reward_mean: 0.085475980952915
    agent_0_total_ball_to_goal_speed_reward_min: -0.7956146818041298
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-34-23
  done: false
  episode_len_mean: 65.1015625
  episode_media: {}
  episode_reward_max: 1.978207476145279
  episode_reward_mean: 0.18456206423020752
  episode_reward_min: -1.7485493163026535
  episodes_this_iter: 128
  episodes_total: 39628
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6003192105293274
          entropy_coeff: 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,340,39026.4,2717280,0.184562,1.97821,-1.74855,65.1016


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10901088
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7932959860338659
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05947630933710907
    agent_0_total_ball_to_goal_speed_reward_min: -0.6164662724448877
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-36-18
  done: false
  episode_len_mean: 56.286764705882355
  episode_media: {}
  episode_reward_max: 1.2462708614144602
  episode_reward_mean: 0.061147409348625205
  episode_reward_min: -1.4987961506009637
  episodes_this_iter: 136
  episodes_total: 39764
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6046560943126679
          entropy_coe

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,341,39141.7,2725272,0.0611474,1.24627,-1.4988,56.2868


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10933056
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8624891797321591
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06980710638565772
    agent_0_total_ball_to_goal_speed_reward_min: -0.45481095765787694
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-38-14
  done: false
  episode_len_mean: 62.35658914728682
  episode_media: {}
  episode_reward_max: 1.3880914603956138
  episode_reward_mean: 0.17290341217655886
  episode_reward_min: -0.7295368721523623
  episodes_this_iter: 129
  episodes_total: 39893
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6015405638217926
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,342,39257.6,2733264,0.172903,1.38809,-0.729537,62.3566


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10965024
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8465113175661968
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0984489001834443
    agent_0_total_ball_to_goal_speed_reward_min: -0.6780400544040044
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-40-09
  done: false
  episode_len_mean: 67.34426229508196
  episode_media: {}
  episode_reward_max: 1.709128572505065
  episode_reward_mean: 0.22099015769736036
  episode_reward_min: -1.5909399522163181
  episodes_this_iter: 122
  episodes_total: 40015
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6044762687683105
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,343,39372.5,2741256,0.22099,1.70913,-1.59094,67.3443


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 10996992
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.03171607072264
    agent_0_total_ball_to_goal_speed_reward_mean: 0.051009274682379695
    agent_0_total_ball_to_goal_speed_reward_min: -0.4635782502755382
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-42-05
  done: false
  episode_len_mean: 61.49586776859504
  episode_media: {}
  episode_reward_max: 1.4791133429049672
  episode_reward_mean: 0.13545943438588576
  episode_reward_min: -1.216097564725404
  episodes_this_iter: 121
  episodes_total: 40136
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6084787542819977
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,344,39488.2,2749248,0.135459,1.47911,-1.2161,61.4959


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11028960
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.6465565868688852
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05746546910239865
    agent_0_total_ball_to_goal_speed_reward_min: -0.5060636889329511
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-44-00
  done: false
  episode_len_mean: 69.1880341880342
  episode_media: {}
  episode_reward_max: 1.8846776717972107
  episode_reward_mean: 0.09283881491602182
  episode_reward_min: -0.8856573964730641
  episodes_this_iter: 117
  episodes_total: 40253
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6098433678150177
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,345,39603.5,2757240,0.0928388,1.88468,-0.885657,69.188


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11060928
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8436330147081503
    agent_0_total_ball_to_goal_speed_reward_mean: 0.042321771083618356
    agent_0_total_ball_to_goal_speed_reward_min: -0.5110827623466159
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-45-58
  done: false
  episode_len_mean: 71.38392857142857
  episode_media: {}
  episode_reward_max: 1.5916624377164816
  episode_reward_mean: 0.09065002277333352
  episode_reward_min: -0.9976720384391093
  episodes_this_iter: 112
  episodes_total: 40365
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6069671516418457
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,346,39721.2,2765232,0.09065,1.59166,-0.997672,71.3839


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11092896
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.6563421720631766
    agent_0_total_ball_to_goal_speed_reward_mean: 0.1153308416619956
    agent_0_total_ball_to_goal_speed_reward_min: -0.47056326698515555
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-47-55
  done: false
  episode_len_mean: 67.02459016393442
  episode_media: {}
  episode_reward_max: 1.6167722218268512
  episode_reward_mean: 0.2161025005653562
  episode_reward_min: -1.0110287977742467
  episodes_this_iter: 122
  episodes_total: 40487
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6051875500679016
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,347,39837.8,2773224,0.216103,1.61677,-1.01103,67.0246


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11124864
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9123663229061905
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0727844929493738
    agent_0_total_ball_to_goal_speed_reward_min: -0.46238523971079615
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-49-50
  done: false
  episode_len_mean: 65.50806451612904
  episode_media: {}
  episode_reward_max: 1.6701255281063623
  episode_reward_mean: 0.1662494768048991
  episode_reward_min: -0.6793200572786229
  episodes_this_iter: 124
  episodes_total: 40611
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6002937951087952
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,348,39953.3,2781216,0.166249,1.67013,-0.67932,65.5081


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11156832
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7155538030041088
    agent_0_total_ball_to_goal_speed_reward_mean: 0.050960430953419344
    agent_0_total_ball_to_goal_speed_reward_min: -0.4637088263293162
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-51-47
  done: false
  episode_len_mean: 69.34210526315789
  episode_media: {}
  episode_reward_max: 1.6446867398079619
  episode_reward_mean: 0.0829655796800195
  episode_reward_min: -1.2675969442623671
  episodes_this_iter: 114
  episodes_total: 40725
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6068609722852707
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,349,40069.8,2789208,0.0829656,1.64469,-1.2676,69.3421


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11188800
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8434041318822915
    agent_0_total_ball_to_goal_speed_reward_mean: 0.11060535800682135
    agent_0_total_ball_to_goal_speed_reward_min: -0.48730050253970175
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-53-42
  done: false
  episode_len_mean: 67.31404958677686
  episode_media: {}
  episode_reward_max: 2.0434803486482194
  episode_reward_mean: 0.29356703934743
  episode_reward_min: -1.1656867256105823
  episodes_this_iter: 121
  episodes_total: 40846
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6012043740749359
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,350,40185.2,2797200,0.293567,2.04348,-1.16569,67.314


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11220768
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8933564057627362
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04295263563656775
    agent_0_total_ball_to_goal_speed_reward_min: -0.47399934472299754
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-55-38
  done: false
  episode_len_mean: 66.40495867768595
  episode_media: {}
  episode_reward_max: 1.2698867022442895
  episode_reward_mean: 0.15099346894618768
  episode_reward_min: -0.9469262302398926
  episodes_this_iter: 121
  episodes_total: 40967
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6109227290153504
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,351,40300.9,2805192,0.150993,1.26989,-0.946926,66.405


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11252736
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.3123967429387389
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04295094287194961
    agent_0_total_ball_to_goal_speed_reward_min: -0.4796522333593026
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-57-34
  done: false
  episode_len_mean: 62.651162790697676
  episode_media: {}
  episode_reward_max: 2.0910477520296755
  episode_reward_mean: 0.15442425380939967
  episode_reward_min: -0.8911056760736846
  episodes_this_iter: 129
  episodes_total: 41096
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6051414952278137
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,352,40416.9,2813184,0.154424,2.09105,-0.891106,62.6512


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11284704
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9797709788423491
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09311470250481237
    agent_0_total_ball_to_goal_speed_reward_min: -0.5437315263221159
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_01-59-31
  done: false
  episode_len_mean: 67.99152542372882
  episode_media: {}
  episode_reward_max: 1.7681630081351676
  episode_reward_mean: 0.24542403125302964
  episode_reward_min: -1.1154178636844572
  episodes_this_iter: 118
  episodes_total: 41214
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6024216417074203
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,353,40533.1,2821176,0.245424,1.76816,-1.11542,67.9915


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11316672
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9090284928457413
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09373254707963755
    agent_0_total_ball_to_goal_speed_reward_min: -0.6493175096144819
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-01-41
  done: false
  episode_len_mean: 60.76190476190476
  episode_media: {}
  episode_reward_max: 1.7005087334509383
  episode_reward_mean: 0.19340224333346054
  episode_reward_min: -0.9281735524248573
  episodes_this_iter: 126
  episodes_total: 41340
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6050345398187638
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,354,40663.4,2829168,0.193402,1.70051,-0.928174,60.7619


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11348640
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1417768052510435
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08827932993861494
    agent_0_total_ball_to_goal_speed_reward_min: -0.5250392049447361
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-03-41
  done: false
  episode_len_mean: 57.572463768115945
  episode_media: {}
  episode_reward_max: 1.6847916633854982
  episode_reward_mean: 0.1463275064549576
  episode_reward_min: -0.86345661680879
  episodes_this_iter: 138
  episodes_total: 41478
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6041019450426102
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,355,40783,2837160,0.146328,1.68479,-0.863457,57.5725


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11380608
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9833956404091855
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07027617047259463
    agent_0_total_ball_to_goal_speed_reward_min: -0.6599216080567465
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-05-50
  done: false
  episode_len_mean: 63.359375
  episode_media: {}
  episode_reward_max: 1.4320581015764884
  episode_reward_mean: 0.2193473566618028
  episode_reward_min: -0.9732241534790322
  episodes_this_iter: 128
  episodes_total: 41606
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.598079285621643
          entropy_coeff: 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,356,40912.4,2845152,0.219347,1.43206,-0.973224,63.3594


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11412576
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9946551040016144
    agent_0_total_ball_to_goal_speed_reward_mean: 0.13287148630122453
    agent_0_total_ball_to_goal_speed_reward_min: -0.5421125312385419
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-07-59
  done: false
  episode_len_mean: 65.39495798319328
  episode_media: {}
  episode_reward_max: 1.4715271947789283
  episode_reward_mean: 0.25504947178680365
  episode_reward_min: -1.0177386606221304
  episodes_this_iter: 119
  episodes_total: 41725
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5947714298963547
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,357,41041.3,2853144,0.255049,1.47153,-1.01774,65.395


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11444544
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8583898725765525
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06387903194788047
    agent_0_total_ball_to_goal_speed_reward_min: -0.6137419037327977
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-10-08
  done: false
  episode_len_mean: 61.53623188405797
  episode_media: {}
  episode_reward_max: 1.2508879950192269
  episode_reward_mean: 0.16753227171872564
  episode_reward_min: -1.0463403212202658
  episodes_this_iter: 138
  episodes_total: 41863
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5975227875709533
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,358,41170.4,2861136,0.167532,1.25089,-1.04634,61.5362


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11476512
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8412439077950478
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03102725095965949
    agent_0_total_ball_to_goal_speed_reward_min: -0.6697514908679637
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-12-16
  done: false
  episode_len_mean: 65.63865546218487
  episode_media: {}
  episode_reward_max: 1.5358513916045093
  episode_reward_mean: 0.10451131987732167
  episode_reward_min: -1.1802023730652378
  episodes_this_iter: 119
  episodes_total: 41982
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5977951563596725
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,359,41297.7,2869128,0.104511,1.53585,-1.1802,65.6387


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11508480
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8498326678913407
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05503760545607696
    agent_0_total_ball_to_goal_speed_reward_min: -0.5214307685296836
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-14-23
  done: false
  episode_len_mean: 64.9920634920635
  episode_media: {}
  episode_reward_max: 1.587183284391611
  episode_reward_mean: 0.22149663271366044
  episode_reward_min: -0.8343752100326411
  episodes_this_iter: 126
  episodes_total: 42108
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.599666476726532
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,360,41424.8,2877120,0.221497,1.58718,-0.834375,64.9921


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11540448
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8282101679560361
    agent_0_total_ball_to_goal_speed_reward_mean: 0.058383639053859994
    agent_0_total_ball_to_goal_speed_reward_min: -0.43114760083727566
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-16-23
  done: false
  episode_len_mean: 64.05833333333334
  episode_media: {}
  episode_reward_max: 1.6031430611474327
  episode_reward_mean: 0.16867421713097103
  episode_reward_min: -0.7271426014028255
  episodes_this_iter: 120
  episodes_total: 42228
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6044450387954712
          entropy_coe

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,361,41545.2,2885112,0.168674,1.60314,-0.727143,64.0583


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11572416
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9227841749643493
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0853493381458455
    agent_0_total_ball_to_goal_speed_reward_min: -0.4865132477012601
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-18-22
  done: false
  episode_len_mean: 71.25233644859813
  episode_media: {}
  episode_reward_max: 1.2915931187402379
  episode_reward_mean: 0.19141879461233885
  episode_reward_min: -0.8894253035764997
  episodes_this_iter: 107
  episodes_total: 42335
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6025840833187103
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,362,41663.4,2893104,0.191419,1.29159,-0.889425,71.2523


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11604384
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.5353519665957254
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08342752162317325
    agent_0_total_ball_to_goal_speed_reward_min: -0.5314912012271967
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-20-28
  done: false
  episode_len_mean: 67.8
  episode_media: {}
  episode_reward_max: 2.0800073830087964
  episode_reward_mean: 0.22977899215047806
  episode_reward_min: -1.376346460703345
  episodes_this_iter: 125
  episodes_total: 42460
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5939208394289017
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,363,41789.7,2901096,0.229779,2.08001,-1.37635,67.8


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11636352
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9344977564237367
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05045248779622813
    agent_0_total_ball_to_goal_speed_reward_min: -0.5343014989432104
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-22-33
  done: false
  episode_len_mean: 66.16666666666667
  episode_media: {}
  episode_reward_max: 1.4075747487337924
  episode_reward_mean: 0.14400746827153757
  episode_reward_min: -1.4473433186792244
  episodes_this_iter: 120
  episodes_total: 42580
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5974507921934128
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,364,41914.7,2909088,0.144007,1.40757,-1.44734,66.1667


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11668320
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9436376015715582
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05604960089849741
    agent_0_total_ball_to_goal_speed_reward_min: -0.5041500047318722
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-24-40
  done: false
  episode_len_mean: 63.65384615384615
  episode_media: {}
  episode_reward_max: 1.6174206387492096
  episode_reward_mean: 0.24769790180078677
  episode_reward_min: -1.032106283195155
  episodes_this_iter: 130
  episodes_total: 42710
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5937744137048722
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,365,42041.7,2917080,0.247698,1.61742,-1.03211,63.6538


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11700288
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9110840927974329
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06713375477400263
    agent_0_total_ball_to_goal_speed_reward_min: -0.5780770633963273
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-26-43
  done: false
  episode_len_mean: 63.37903225806452
  episode_media: {}
  episode_reward_max: 1.5648714749215853
  episode_reward_mean: 0.18160156755053
  episode_reward_min: -1.3033669481428036
  episodes_this_iter: 124
  episodes_total: 42834
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6018578159809113
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,366,42164.3,2925072,0.181602,1.56487,-1.30337,63.379


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11732256
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.6159515687999289
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09181400138198642
    agent_0_total_ball_to_goal_speed_reward_min: -0.5633025916076921
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-28-45
  done: false
  episode_len_mean: 68.78947368421052
  episode_media: {}
  episode_reward_max: 1.9696195050257534
  episode_reward_mean: 0.18107902155622196
  episode_reward_min: -1.3086538070413236
  episodes_this_iter: 114
  episodes_total: 42948
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6037188150882721
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,367,42286,2933064,0.181079,1.96962,-1.30865,68.7895


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11764224
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8351128530331011
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09011303968551508
    agent_0_total_ball_to_goal_speed_reward_min: -0.5697739355033655
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-30-52
  done: false
  episode_len_mean: 63.80487804878049
  episode_media: {}
  episode_reward_max: 1.1236753430889264
  episode_reward_mean: 0.16073978534577377
  episode_reward_min: -1.0459257155418495
  episodes_this_iter: 123
  episodes_total: 43071
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5949693827629089
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,368,42413.7,2941056,0.16074,1.12368,-1.04593,63.8049


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11796192
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.092624380780552
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07105126157616555
    agent_0_total_ball_to_goal_speed_reward_min: -0.48841514323550556
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-33-00
  done: false
  episode_len_mean: 65.24409448818898
  episode_media: {}
  episode_reward_max: 2.5591442194921754
  episode_reward_mean: 0.25704144082523106
  episode_reward_min: -0.8373800039745722
  episodes_this_iter: 127
  episodes_total: 43198
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.585382976770401
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,369,42541,2949048,0.257041,2.55914,-0.83738,65.2441


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11828160
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8526647846351637
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05373671040207177
    agent_0_total_ball_to_goal_speed_reward_min: -0.6252873291526687
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-34-59
  done: false
  episode_len_mean: 63.04838709677419
  episode_media: {}
  episode_reward_max: 1.9233153991985712
  episode_reward_mean: 0.17234923364742089
  episode_reward_min: -1.2216396743696007
  episodes_this_iter: 124
  episodes_total: 43322
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5985433623790741
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,370,42659.9,2957040,0.172349,1.92332,-1.22164,63.0484


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11860128
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9422092530159815
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06909247379979375
    agent_0_total_ball_to_goal_speed_reward_min: -0.507148925903228
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-36-55
  done: false
  episode_len_mean: 60.53488372093023
  episode_media: {}
  episode_reward_max: 1.7167718737933413
  episode_reward_mean: 0.20704620985167135
  episode_reward_min: -0.9998796880381153
  episodes_this_iter: 129
  episodes_total: 43451
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5974526815414428
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,371,42775.8,2965032,0.207046,1.71677,-0.99988,60.5349


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11892096
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8686839403700796
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04998349947960845
    agent_0_total_ball_to_goal_speed_reward_min: -0.5393437113059093
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-38-50
  done: false
  episode_len_mean: 66.2741935483871
  episode_media: {}
  episode_reward_max: 1.3417179407245294
  episode_reward_mean: 0.19448641739461667
  episode_reward_min: -1.5099799588630611
  episodes_this_iter: 124
  episodes_total: 43575
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.594753647685051
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,372,42891.4,2973024,0.194486,1.34172,-1.50998,66.2742


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11924064
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7178195103179922
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08353864919917377
    agent_0_total_ball_to_goal_speed_reward_min: -0.559238132734965
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-40-48
  done: false
  episode_len_mean: 64.73333333333333
  episode_media: {}
  episode_reward_max: 1.5472664855209581
  episode_reward_mean: 0.17852699669917918
  episode_reward_min: -0.8313981327670172
  episodes_this_iter: 120
  episodes_total: 43695
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5913634233474732
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,373,43008.9,2981016,0.178527,1.54727,-0.831398,64.7333


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11956032
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9549316036851022
    agent_0_total_ball_to_goal_speed_reward_mean: 0.018249967002112674
    agent_0_total_ball_to_goal_speed_reward_min: -0.7945101106879469
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-42-44
  done: false
  episode_len_mean: 66.28688524590164
  episode_media: {}
  episode_reward_max: 1.3319034443188924
  episode_reward_mean: 0.09064684207369357
  episode_reward_min: -2.0354467349400944
  episodes_this_iter: 122
  episodes_total: 43817
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5986401250362396
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,374,43125.1,2989008,0.0906468,1.3319,-2.03545,66.2869


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 11988000
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0255020365529004
    agent_0_total_ball_to_goal_speed_reward_mean: 0.041827296638479915
    agent_0_total_ball_to_goal_speed_reward_min: -0.5793420087520877
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-44-41
  done: false
  episode_len_mean: 56.787234042553195
  episode_media: {}
  episode_reward_max: 1.5453507067882528
  episode_reward_mean: 0.12507882906648277
  episode_reward_min: -1.4071125025992353
  episodes_this_iter: 141
  episodes_total: 43958
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5971491260528564
          entropy_coe

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,375,43241.6,2997000,0.125079,1.54535,-1.40711,56.7872


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12019968
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.2297840034225098
    agent_0_total_ball_to_goal_speed_reward_mean: 0.034848149182403726
    agent_0_total_ball_to_goal_speed_reward_min: -0.5526377264122591
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-46-39
  done: false
  episode_len_mean: 67.3821138211382
  episode_media: {}
  episode_reward_max: 2.3276821328656556
  episode_reward_mean: 0.1615142344594038
  episode_reward_min: -1.0038238880255201
  episodes_this_iter: 123
  episodes_total: 44081
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5994445351362229
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,376,43359.5,3004992,0.161514,2.32768,-1.00382,67.3821


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12051936
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8545779585307396
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09649121687265552
    agent_0_total_ball_to_goal_speed_reward_min: -0.5072782973033835
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-48-35
  done: false
  episode_len_mean: 62.41269841269841
  episode_media: {}
  episode_reward_max: 1.3502353222138925
  episode_reward_mean: 0.2228751223873147
  episode_reward_min: -0.8745785980158005
  episodes_this_iter: 126
  episodes_total: 44207
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.6019621493816376
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,377,43475.9,3012984,0.222875,1.35024,-0.874579,62.4127


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12083904
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0033895615133541
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0809819799284412
    agent_0_total_ball_to_goal_speed_reward_min: -0.47378913468410017
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-50-32
  done: false
  episode_len_mean: 63.66129032258065
  episode_media: {}
  episode_reward_max: 1.3211301364195394
  episode_reward_mean: 0.31688102690428677
  episode_reward_min: -1.0002917686451855
  episodes_this_iter: 124
  episodes_total: 44331
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5985835440158844
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,378,43592.7,3020976,0.316881,1.32113,-1.00029,63.6613


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12115872
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.746312098623675
    agent_0_total_ball_to_goal_speed_reward_mean: 0.056170072364832006
    agent_0_total_ball_to_goal_speed_reward_min: -0.6395297744507245
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-52-29
  done: false
  episode_len_mean: 55.38620689655173
  episode_media: {}
  episode_reward_max: 1.8461564363442187
  episode_reward_mean: 0.1971203604031745
  episode_reward_min: -0.8892706691222134
  episodes_this_iter: 145
  episodes_total: 44476
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5896548454761505
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,379,43709.8,3028968,0.19712,1.84616,-0.889271,55.3862


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12147840
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7900079259331912
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05674469384191456
    agent_0_total_ball_to_goal_speed_reward_min: -0.7742066760974415
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-54-27
  done: false
  episode_len_mean: 57.166666666666664
  episode_media: {}
  episode_reward_max: 1.4446879363375231
  episode_reward_mean: 0.16706058244491725
  episode_reward_min: -1.0462628787498929
  episodes_this_iter: 138
  episodes_total: 44614
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5962168241739273
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,380,43827.3,3036960,0.167061,1.44469,-1.04626,57.1667


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12179808
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9670147853229435
    agent_0_total_ball_to_goal_speed_reward_mean: 0.045782958533458845
    agent_0_total_ball_to_goal_speed_reward_min: -0.7506667417118438
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-56-23
  done: false
  episode_len_mean: 56.61805555555556
  episode_media: {}
  episode_reward_max: 1.5665963037408108
  episode_reward_mean: 0.19750710819097492
  episode_reward_min: -1.2112729060944605
  episodes_this_iter: 144
  episodes_total: 44758
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5924476245641709
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,381,43943.7,3044952,0.197507,1.5666,-1.21127,56.6181


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12211776
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9919870260253831
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04645669057097543
    agent_0_total_ball_to_goal_speed_reward_min: -0.633097650939866
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_02-58-21
  done: false
  episode_len_mean: 61.23076923076923
  episode_media: {}
  episode_reward_max: 1.2880994663568366
  episode_reward_mean: 0.10333042499256748
  episode_reward_min: -0.8912824284408591
  episodes_this_iter: 130
  episodes_total: 44888
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5933144127130509
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,382,44061.3,3052944,0.10333,1.2881,-0.891282,61.2308


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12243744
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1374548952213834
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03192372977070132
    agent_0_total_ball_to_goal_speed_reward_min: -0.8942737918709281
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-00-17
  done: false
  episode_len_mean: 62.512
  episode_media: {}
  episode_reward_max: 1.4283655477359385
  episode_reward_mean: 0.12404478098758619
  episode_reward_min: -1.0264812217052863
  episodes_this_iter: 125
  episodes_total: 45013
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5933795372247695
          entropy_coeff: 0.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,383,44177.5,3060936,0.124045,1.42837,-1.02648,62.512


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12275712
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8290467124395863
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07989886142768647
    agent_0_total_ball_to_goal_speed_reward_min: -0.6780875983727485
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-02-15
  done: false
  episode_len_mean: 61.29545454545455
  episode_media: {}
  episode_reward_max: 1.8593795963445228
  episode_reward_mean: 0.20722195597757456
  episode_reward_min: -1.3743627292922578
  episodes_this_iter: 132
  episodes_total: 45145
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5872220234870911
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,384,44295,3068928,0.207222,1.85938,-1.37436,61.2955


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12307680
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8974225812808144
    agent_0_total_ball_to_goal_speed_reward_mean: 0.12136532308618772
    agent_0_total_ball_to_goal_speed_reward_min: -0.4762251580035002
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-04-12
  done: false
  episode_len_mean: 64.0
  episode_media: {}
  episode_reward_max: 1.2376998830197365
  episode_reward_mean: 0.27127650281565435
  episode_reward_min: -0.8767977053917013
  episodes_this_iter: 126
  episodes_total: 45271
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5959729776382446
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,385,44412.6,3076920,0.271277,1.2377,-0.876798,64


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12339648
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8240828786850102
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0823153768216263
    agent_0_total_ball_to_goal_speed_reward_min: -0.5046142590083122
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-06-09
  done: false
  episode_len_mean: 59.45652173913044
  episode_media: {}
  episode_reward_max: 1.6703043031177156
  episode_reward_mean: 0.19087885112473316
  episode_reward_min: -0.9149347454547552
  episodes_this_iter: 138
  episodes_total: 45409
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5964002842903138
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,386,44529.3,3084912,0.190879,1.6703,-0.914935,59.4565


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12371616
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7973016749400442
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0750093706014376
    agent_0_total_ball_to_goal_speed_reward_min: -0.7977815952584102
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-08-06
  done: false
  episode_len_mean: 62.3671875
  episode_media: {}
  episode_reward_max: 1.326051925664157
  episode_reward_mean: 0.20121316389149918
  episode_reward_min: -0.9926390361489092
  episodes_this_iter: 128
  episodes_total: 45537
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.597462886095047
          entropy_coeff: 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,387,44646.2,3092904,0.201213,1.32605,-0.992639,62.3672


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12403584
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.776231670385728
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0466982757711529
    agent_0_total_ball_to_goal_speed_reward_min: -0.7928900239906035
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-10-02
  done: false
  episode_len_mean: 63.203125
  episode_media: {}
  episode_reward_max: 1.3134870501322915
  episode_reward_mean: 0.1174027166008314
  episode_reward_min: -0.8560671339732406
  episodes_this_iter: 128
  episodes_total: 45665
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5861807870864868
          entropy_coeff: 0.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,388,44762.4,3100896,0.117403,1.31349,-0.856067,63.2031


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12435552
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0392870260773999
    agent_0_total_ball_to_goal_speed_reward_mean: 0.051342370275441716
    agent_0_total_ball_to_goal_speed_reward_min: -0.5091073703247363
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-11-59
  done: false
  episode_len_mean: 60.653543307086615
  episode_media: {}
  episode_reward_max: 1.6944375158747804
  episode_reward_mean: 0.17036889289148857
  episode_reward_min: -0.9859296710475405
  episodes_this_iter: 127
  episodes_total: 45792
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5965709503889084
          entropy_coe

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,389,44878.8,3108888,0.170369,1.69444,-0.98593,60.6535


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12467520
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.800479975087514
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0953523022136024
    agent_0_total_ball_to_goal_speed_reward_min: -0.6970773867095333
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-14-00
  done: false
  episode_len_mean: 63.42741935483871
  episode_media: {}
  episode_reward_max: 1.3398380282702451
  episode_reward_mean: 0.2433561141058095
  episode_reward_min: -1.8364868970495776
  episodes_this_iter: 124
  episodes_total: 45916
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5953160762786865
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,390,45000.3,3116880,0.243356,1.33984,-1.83649,63.4274


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12499488
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6587397698714255
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07962032389996287
    agent_0_total_ball_to_goal_speed_reward_min: -0.5052307389518985
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-16-01
  done: false
  episode_len_mean: 62.53787878787879
  episode_media: {}
  episode_reward_max: 1.33539374392887
  episode_reward_mean: 0.2306240070258404
  episode_reward_min: -1.006653798850953
  episodes_this_iter: 132
  episodes_total: 46048
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5968020609617233
          entropy_coeff: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,391,45121.1,3124872,0.230624,1.33539,-1.00665,62.5379


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12531456
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8298397148029496
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05196194440037873
    agent_0_total_ball_to_goal_speed_reward_min: -0.7114415940429929
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-18-10
  done: false
  episode_len_mean: 56.391304347826086
  episode_media: {}
  episode_reward_max: 1.5029099090714242
  episode_reward_mean: 0.27236959290184853
  episode_reward_min: -0.8354018678589943
  episodes_this_iter: 138
  episodes_total: 46186
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5892692424058914
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,392,45250.2,3132864,0.27237,1.50291,-0.835402,56.3913


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12563424
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7192950972515888
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03599467747932321
    agent_0_total_ball_to_goal_speed_reward_min: -0.6427842084372443
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-20-23
  done: false
  episode_len_mean: 58.62595419847328
  episode_media: {}
  episode_reward_max: 1.3621834800965131
  episode_reward_mean: 0.13484815141533396
  episode_reward_min: -1.2158925185687388
  episodes_this_iter: 131
  episodes_total: 46317
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.602042237997055
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,393,45382.7,3140856,0.134848,1.36218,-1.21589,58.626


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12595392
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9597240949372857
    agent_0_total_ball_to_goal_speed_reward_mean: 0.055510356953681596
    agent_0_total_ball_to_goal_speed_reward_min: -1.0123134932114133
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-22-25
  done: false
  episode_len_mean: 67.99186991869918
  episode_media: {}
  episode_reward_max: 1.2714782504856958
  episode_reward_mean: 0.10660500533214862
  episode_reward_min: -1.418377539284193
  episodes_this_iter: 123
  episodes_total: 46440
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5864409621953964
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,394,45504.8,3148848,0.106605,1.27148,-1.41838,67.9919


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12627360
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.889752025082431
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07366983585135475
    agent_0_total_ball_to_goal_speed_reward_min: -0.45070457091199023
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-24-30
  done: false
  episode_len_mean: 55.94405594405595
  episode_media: {}
  episode_reward_max: 1.4224289561980852
  episode_reward_mean: 0.19047186534882557
  episode_reward_min: -0.799440738896379
  episodes_this_iter: 143
  episodes_total: 46583
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5916836103200912
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,395,45629,3156840,0.190472,1.42243,-0.799441,55.9441


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12659328
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9455707835074896
    agent_0_total_ball_to_goal_speed_reward_mean: 0.1090740321655948
    agent_0_total_ball_to_goal_speed_reward_min: -0.5848301585000865
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-26-30
  done: false
  episode_len_mean: 60.42962962962963
  episode_media: {}
  episode_reward_max: 1.4715424852817325
  episode_reward_mean: 0.22223776741058204
  episode_reward_min: -1.1508136901053518
  episodes_this_iter: 135
  episodes_total: 46718
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5944642218351364
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,396,45749,3164832,0.222238,1.47154,-1.15081,60.4296


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12691296
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0183537422094844
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10397238101058892
    agent_0_total_ball_to_goal_speed_reward_min: -0.6258855901729844
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-28-29
  done: false
  episode_len_mean: 69.37837837837837
  episode_media: {}
  episode_reward_max: 1.5243606768759876
  episode_reward_mean: 0.22088763356152805
  episode_reward_min: -0.7327301376529234
  episodes_this_iter: 111
  episodes_total: 46829
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5922257339954377
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,397,45868.7,3172824,0.220888,1.52436,-0.73273,69.3784


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12723264
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.135558881077347
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07252497764276221
    agent_0_total_ball_to_goal_speed_reward_min: -0.6325551448313617
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-30-24
  done: false
  episode_len_mean: 65.04878048780488
  episode_media: {}
  episode_reward_max: 1.7627277042958633
  episode_reward_mean: 0.20236702105078722
  episode_reward_min: -0.8770606642662484
  episodes_this_iter: 123
  episodes_total: 46952
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5905669202804565
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,398,45982.8,3180816,0.202367,1.76273,-0.877061,65.0488


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12755232
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9162057495634616
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06289730988553463
    agent_0_total_ball_to_goal_speed_reward_min: -0.48104057291577784
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-32-23
  done: false
  episode_len_mean: 62.208
  episode_media: {}
  episode_reward_max: 1.5622274395266784
  episode_reward_mean: 0.15993629002331386
  episode_reward_min: -1.0796322005977745
  episodes_this_iter: 125
  episodes_total: 47077
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5901471191644668
          entropy_coeff: 0.0
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,399,46102.4,3188808,0.159936,1.56223,-1.07963,62.208


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12787200
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9222671155776289
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09135639169996844
    agent_0_total_ball_to_goal_speed_reward_min: -0.6232481201464198
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-34-23
  done: false
  episode_len_mean: 64.04580152671755
  episode_media: {}
  episode_reward_max: 1.4944832371385228
  episode_reward_mean: 0.24758203897910663
  episode_reward_min: -0.9033512705579445
  episodes_this_iter: 131
  episodes_total: 47208
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5824349417686462
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,400,46222.4,3196800,0.247582,1.49448,-0.903351,64.0458


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12819168
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1280289035524083
    agent_0_total_ball_to_goal_speed_reward_mean: 0.12140165286295487
    agent_0_total_ball_to_goal_speed_reward_min: -0.44915752582088264
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-36-21
  done: false
  episode_len_mean: 59.92481203007519
  episode_media: {}
  episode_reward_max: 1.4759902266712794
  episode_reward_mean: 0.220305016430832
  episode_reward_min: -1.2292305727368475
  episodes_this_iter: 133
  episodes_total: 47341
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5911720932722092
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,401,46340.2,3204792,0.220305,1.47599,-1.22923,59.9248


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12851136
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9629432537921425
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08675054925264498
    agent_0_total_ball_to_goal_speed_reward_min: -0.6155805919814572
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-38-18
  done: false
  episode_len_mean: 59.51127819548872
  episode_media: {}
  episode_reward_max: 1.3969573889199196
  episode_reward_mean: 0.24345009974258927
  episode_reward_min: -1.0063725018852612
  episodes_this_iter: 133
  episodes_total: 47474
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5877383284568787
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,402,46456.2,3212784,0.24345,1.39696,-1.00637,59.5113


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12883104
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8820560075440562
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06661893618056038
    agent_0_total_ball_to_goal_speed_reward_min: -0.49492746839380636
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-40-15
  done: false
  episode_len_mean: 61.3046875
  episode_media: {}
  episode_reward_max: 1.9079974980444314
  episode_reward_mean: 0.19493211599931792
  episode_reward_min: -1.1179021316824516
  episodes_this_iter: 128
  episodes_total: 47602
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5908430144786835
          entropy_coeff: 0.0


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,403,46573.4,3220776,0.194932,1.908,-1.1179,61.3047


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12915072
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8441510221488657
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10557935644409593
    agent_0_total_ball_to_goal_speed_reward_min: -0.5551559158662147
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-42-12
  done: false
  episode_len_mean: 61.40909090909091
  episode_media: {}
  episode_reward_max: 1.861172708102507
  episode_reward_mean: 0.17371778355706258
  episode_reward_min: -0.9830538320639284
  episodes_this_iter: 132
  episodes_total: 47734
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5829516659975051
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,404,46690.7,3228768,0.173718,1.86117,-0.983054,61.4091


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12947040
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8831599705337544
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08868642813077274
    agent_0_total_ball_to_goal_speed_reward_min: -0.6047177279423235
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-44-08
  done: false
  episode_len_mean: 70.30357142857143
  episode_media: {}
  episode_reward_max: 1.8250858508464036
  episode_reward_mean: 0.1366361292220735
  episode_reward_min: -0.8041066949753057
  episodes_this_iter: 112
  episodes_total: 47846
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5907198388576508
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,405,46806.9,3236760,0.136636,1.82509,-0.804107,70.3036


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 12979008
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0163452407522124
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07004122500856765
    agent_0_total_ball_to_goal_speed_reward_min: -0.595286950794921
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-46-05
  done: false
  episode_len_mean: 63.96875
  episode_media: {}
  episode_reward_max: 1.892617546284545
  episode_reward_mean: 0.1999697354672455
  episode_reward_min: -1.0351315656471325
  episodes_this_iter: 128
  episodes_total: 47974
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5879677263498306
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,406,46923.5,3244752,0.19997,1.89262,-1.03513,63.9688


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13010976
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9088893818803551
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09205549353927449
    agent_0_total_ball_to_goal_speed_reward_min: -0.5279908834027212
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-48-05
  done: false
  episode_len_mean: 64.984
  episode_media: {}
  episode_reward_max: 1.538388122462731
  episode_reward_mean: 0.2922009231332278
  episode_reward_min: -1.0739888316919974
  episodes_this_iter: 125
  episodes_total: 48099
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5860138474702835
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,407,47043.1,3252744,0.292201,1.53839,-1.07399,64.984


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13042944
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9486237682092771
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03894676158474162
    agent_0_total_ball_to_goal_speed_reward_min: -0.7432082921446161
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-50-04
  done: false
  episode_len_mean: 66.23275862068965
  episode_media: {}
  episode_reward_max: 1.132533759207297
  episode_reward_mean: 0.11941345904294348
  episode_reward_min: -1.1019631738721065
  episodes_this_iter: 116
  episodes_total: 48215
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5881211103200913
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,408,47162.6,3260736,0.119413,1.13253,-1.10196,66.2328


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13074912
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9643387742157218
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09228450738182623
    agent_0_total_ball_to_goal_speed_reward_min: -1.0444660946330306
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-52-03
  done: false
  episode_len_mean: 75.1651376146789
  episode_media: {}
  episode_reward_max: 1.6007576867734192
  episode_reward_mean: 0.223663825063673
  episode_reward_min: -0.9997309515223296
  episodes_this_iter: 109
  episodes_total: 48324
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5853959959745407
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,409,47281.6,3268728,0.223664,1.60076,-0.999731,75.1651


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13106880
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9838463394312538
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09233277627879073
    agent_0_total_ball_to_goal_speed_reward_min: -0.7818387410372314
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-54-01
  done: false
  episode_len_mean: 66.25
  episode_media: {}
  episode_reward_max: 1.808061586123527
  episode_reward_mean: 0.2532964531397638
  episode_reward_min: -1.1029965130159933
  episodes_this_iter: 116
  episodes_total: 48440
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5867583388090134
          entropy_coeff: 0.0
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,410,47399,3276720,0.253296,1.80806,-1.103,66.25


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13138848
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9011131826651981
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0651099082572486
    agent_0_total_ball_to_goal_speed_reward_min: -0.5152090109015274
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-56-00
  done: false
  episode_len_mean: 71.75652173913043
  episode_media: {}
  episode_reward_max: 1.4747628166767894
  episode_reward_mean: 0.17185279824622085
  episode_reward_min: -1.0438393968878623
  episodes_this_iter: 115
  episodes_total: 48555
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5903607275485993
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,411,47517.8,3284712,0.171853,1.47476,-1.04384,71.7565


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13170816
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9599824469111728
    agent_0_total_ball_to_goal_speed_reward_mean: 0.11353740046365197
    agent_0_total_ball_to_goal_speed_reward_min: -0.5830377510003164
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-57-52
  done: false
  episode_len_mean: 59.8796992481203
  episode_media: {}
  episode_reward_max: 1.4550297268196344
  episode_reward_mean: 0.21630260930152703
  episode_reward_min: -0.8736789792731012
  episodes_this_iter: 133
  episodes_total: 48688
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5891875569820404
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,412,47630.3,3292704,0.216303,1.45503,-0.873679,59.8797


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13202784
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7099758678878698
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06987424244144243
    agent_0_total_ball_to_goal_speed_reward_min: -0.4441897109688018
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_03-59-49
  done: false
  episode_len_mean: 62.92063492063492
  episode_media: {}
  episode_reward_max: 1.305756022576829
  episode_reward_mean: 0.1619705744651903
  episode_reward_min: -1.125446630937585
  episodes_this_iter: 126
  episodes_total: 48814
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5938545684814454
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,413,47747.3,3300696,0.161971,1.30576,-1.12545,62.9206


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13234752
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9581527174376353
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07849005335254404
    agent_0_total_ball_to_goal_speed_reward_min: -0.5613514549370314
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-01-47
  done: false
  episode_len_mean: 58.52142857142857
  episode_media: {}
  episode_reward_max: 1.6848756104704739
  episode_reward_mean: 0.18615699456866347
  episode_reward_min: -0.9016339528916086
  episodes_this_iter: 140
  episodes_total: 48954
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5890415818691254
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,414,47864.7,3308688,0.186157,1.68488,-0.901634,58.5214


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13266720
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9635000735195391
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10411041452983051
    agent_0_total_ball_to_goal_speed_reward_min: -0.451513315413688
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-03-44
  done: false
  episode_len_mean: 65.35593220338983
  episode_media: {}
  episode_reward_max: 1.810560170817919
  episode_reward_mean: 0.2634227879997556
  episode_reward_min: -1.017781781836817
  episodes_this_iter: 118
  episodes_total: 49072
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5891856858730317
          entropy_coeff: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,415,47982.2,3316680,0.263423,1.81056,-1.01778,65.3559


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13298688
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8996457906603073
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07552807916001476
    agent_0_total_ball_to_goal_speed_reward_min: -0.42628590565103575
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-05-42
  done: false
  episode_len_mean: 65.368
  episode_media: {}
  episode_reward_max: 1.5208241900146555
  episode_reward_mean: 0.2569750617057212
  episode_reward_min: -0.7142408208049407
  episodes_this_iter: 125
  episodes_total: 49197
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5889000535011292
          entropy_coeff: 0.0
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,416,48099.6,3324672,0.256975,1.52082,-0.714241,65.368


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13330656
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.056815009042498
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08751870305718286
    agent_0_total_ball_to_goal_speed_reward_min: -0.6177832092791604
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-07-38
  done: false
  episode_len_mean: 72.23636363636363
  episode_media: {}
  episode_reward_max: 1.3694068093002125
  episode_reward_mean: 0.2291152956537758
  episode_reward_min: -1.1223684000443788
  episodes_this_iter: 110
  episodes_total: 49307
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.587929795384407
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,417,48216.1,3332664,0.229115,1.36941,-1.12237,72.2364


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13362624
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9215309119710863
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09115943221448339
    agent_0_total_ball_to_goal_speed_reward_min: -0.8493464216502125
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-09-35
  done: false
  episode_len_mean: 68.66666666666667
  episode_media: {}
  episode_reward_max: 1.5715662452471935
  episode_reward_mean: 0.23425572993009808
  episode_reward_min: -1.698328074014614
  episodes_this_iter: 117
  episodes_total: 49424
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5847705199718475
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,418,48332.3,3340656,0.234256,1.57157,-1.69833,68.6667


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13394592
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7354240937161609
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09172622759987507
    agent_0_total_ball_to_goal_speed_reward_min: -0.5320813015522634
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-11-31
  done: false
  episode_len_mean: 62.43307086614173
  episode_media: {}
  episode_reward_max: 1.5637649062201069
  episode_reward_mean: 0.2106993840468314
  episode_reward_min: -1.2246623995145904
  episodes_this_iter: 127
  episodes_total: 49551
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5872124437093734
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,419,48449,3348648,0.210699,1.56376,-1.22466,62.4331


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13426560
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6008474554864076
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05906561237555072
    agent_0_total_ball_to_goal_speed_reward_min: -0.5112636847342827
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-13-34
  done: false
  episode_len_mean: 61.323076923076925
  episode_media: {}
  episode_reward_max: 1.7245816933628633
  episode_reward_mean: 0.13546699318565225
  episode_reward_min: -1.2397878357319132
  episodes_this_iter: 130
  episodes_total: 49681
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5934468491077423
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,420,48571.3,3356640,0.135467,1.72458,-1.23979,61.3231


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13458528
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8333667329056972
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08113810652754325
    agent_0_total_ball_to_goal_speed_reward_min: -0.6468854238628621
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-15-32
  done: false
  episode_len_mean: 68.81666666666666
  episode_media: {}
  episode_reward_max: 2.114836408082792
  episode_reward_mean: 0.16652588891903236
  episode_reward_min: -1.6579873474392413
  episodes_this_iter: 120
  episodes_total: 49801
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5882452795505524
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,421,48689.3,3364632,0.166526,2.11484,-1.65799,68.8167


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13490496
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0672293821520824
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09603432299946457
    agent_0_total_ball_to_goal_speed_reward_min: -0.8443533332292249
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-17-29
  done: false
  episode_len_mean: 58.04477611940298
  episode_media: {}
  episode_reward_max: 1.2685854564377443
  episode_reward_mean: 0.2296680960280834
  episode_reward_min: -0.6868463785741412
  episodes_this_iter: 134
  episodes_total: 49935
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5821974093914032
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,422,48806.1,3372624,0.229668,1.26859,-0.686846,58.0448


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13522464
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.2006444614428935
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07432175454720867
    agent_0_total_ball_to_goal_speed_reward_min: -0.49577272591708355
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-19-29
  done: false
  episode_len_mean: 59.97709923664122
  episode_media: {}
  episode_reward_max: 1.4543757438826161
  episode_reward_mean: 0.14180635610045034
  episode_reward_min: -0.6585398528565929
  episodes_this_iter: 131
  episodes_total: 50066
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5853381289243698
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,423,48926.7,3380616,0.141806,1.45438,-0.65854,59.9771


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13554432
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8869273330961824
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07270662641326896
    agent_0_total_ball_to_goal_speed_reward_min: -0.7644652485767282
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-21-24
  done: false
  episode_len_mean: 68.60330578512396
  episode_media: {}
  episode_reward_max: 1.297333201408842
  episode_reward_mean: 0.17772658896242408
  episode_reward_min: -0.9011718166339838
  episodes_this_iter: 121
  episodes_total: 50187
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5813515447378158
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,424,49041.2,3388608,0.177727,1.29733,-0.901172,68.6033


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13586400
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.2046495480350143
    agent_0_total_ball_to_goal_speed_reward_mean: 0.009374097840517813
    agent_0_total_ball_to_goal_speed_reward_min: -0.6728150777379319
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-23-18
  done: false
  episode_len_mean: 60.07936507936508
  episode_media: {}
  episode_reward_max: 2.140543309706386
  episode_reward_mean: 0.11184288518954837
  episode_reward_min: -1.2334457558516583
  episodes_this_iter: 126
  episodes_total: 50313
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.584447923541069
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,425,49155.3,3396600,0.111843,2.14054,-1.23345,60.0794


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13618368
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7807058028557583
    agent_0_total_ball_to_goal_speed_reward_mean: 0.006061915585290296
    agent_0_total_ball_to_goal_speed_reward_min: -0.7326701329242186
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-25-12
  done: false
  episode_len_mean: 64.95275590551181
  episode_media: {}
  episode_reward_max: 1.4140680374654848
  episode_reward_mean: 0.09543897496366315
  episode_reward_min: -1.1164045436131302
  episodes_this_iter: 127
  episodes_total: 50440
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5829163395166397
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,426,49268.8,3404592,0.095439,1.41407,-1.1164,64.9528


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13650336
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7285321318424883
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0868174485656152
    agent_0_total_ball_to_goal_speed_reward_min: -0.5533474936086513
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-27-06
  done: false
  episode_len_mean: 66.66666666666667
  episode_media: {}
  episode_reward_max: 1.671643829529697
  episode_reward_mean: 0.22242432023388445
  episode_reward_min: -1.27056780436636
  episodes_this_iter: 120
  episodes_total: 50560
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5837785609960556
          entropy_coeff: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,427,49382.8,3412584,0.222424,1.67164,-1.27057,66.6667


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13682304
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.778186993143789
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09834093532631266
    agent_0_total_ball_to_goal_speed_reward_min: -0.36099728551961835
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-28-59
  done: false
  episode_len_mean: 64.968
  episode_media: {}
  episode_reward_max: 1.362759973757866
  episode_reward_mean: 0.18367177533639797
  episode_reward_min: -1.447048961839369
  episodes_this_iter: 125
  episodes_total: 50685
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5842796434164047
          entropy_coeff: 0.0
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,428,49496.3,3420576,0.183672,1.36276,-1.44705,64.968


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13714272
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9425093160329095
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07235610892462253
    agent_0_total_ball_to_goal_speed_reward_min: -0.5558174697887308
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-30-54
  done: false
  episode_len_mean: 68.62280701754386
  episode_media: {}
  episode_reward_max: 1.3907666873023605
  episode_reward_mean: 0.158728423402932
  episode_reward_min: -0.8573527935715699
  episodes_this_iter: 114
  episodes_total: 50799
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5837393226623535
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,429,49610.7,3428568,0.158728,1.39077,-0.857353,68.6228


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13746240
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0232301103787182
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08366110140740401
    agent_0_total_ball_to_goal_speed_reward_min: -0.3716562792914916
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-32-48
  done: false
  episode_len_mean: 62.86153846153846
  episode_media: {}
  episode_reward_max: 1.6898790991521737
  episode_reward_mean: 0.22411050195236454
  episode_reward_min: -1.7085008547318445
  episodes_this_iter: 130
  episodes_total: 50929
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.581789813041687
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,430,49724.5,3436560,0.224111,1.68988,-1.7085,62.8615


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13778208
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6917122612756504
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0727566776015737
    agent_0_total_ball_to_goal_speed_reward_min: -1.054219462604777
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-34-42
  done: false
  episode_len_mean: 66.27586206896552
  episode_media: {}
  episode_reward_max: 1.2771396595691824
  episode_reward_mean: 0.17587403033437896
  episode_reward_min: -1.2258431282448417
  episodes_this_iter: 116
  episodes_total: 51045
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5817527544498443
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,431,49838.5,3444552,0.175874,1.27714,-1.22584,66.2759


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13810176
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9933481108354825
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04751082576154472
    agent_0_total_ball_to_goal_speed_reward_min: -0.5884186280118189
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-36-37
  done: false
  episode_len_mean: 56.147887323943664
  episode_media: {}
  episode_reward_max: 1.5351600489225734
  episode_reward_mean: 0.1592993730055849
  episode_reward_min: -1.0531667286337019
  episodes_this_iter: 142
  episodes_total: 51187
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5795521492958069
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,432,49954.2,3452544,0.159299,1.53516,-1.05317,56.1479


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13842144
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.842625293443657
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05736201419450754
    agent_0_total_ball_to_goal_speed_reward_min: -0.8266031982323889
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-38-34
  done: false
  episode_len_mean: 60.0863309352518
  episode_media: {}
  episode_reward_max: 1.283985533106864
  episode_reward_mean: 0.05991506211962911
  episode_reward_min: -1.0451527700442287
  episodes_this_iter: 139
  episodes_total: 51326
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5879793571233749
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,433,50070.5,3460536,0.0599151,1.28399,-1.04515,60.0863


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13874112
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8663022405475768
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09043733970211203
    agent_0_total_ball_to_goal_speed_reward_min: -0.5112636194817056
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-40-31
  done: false
  episode_len_mean: 72.79439252336448
  episode_media: {}
  episode_reward_max: 1.7193860153837655
  episode_reward_mean: 0.26038129062314896
  episode_reward_min: -1.4216460223859546
  episodes_this_iter: 107
  episodes_total: 51433
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5802374676465988
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,434,50187.3,3468528,0.260381,1.71939,-1.42165,72.7944


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13906080
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6991493531101869
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07514536659024666
    agent_0_total_ball_to_goal_speed_reward_min: -0.4857504398754306
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-42-29
  done: false
  episode_len_mean: 59.11594202898551
  episode_media: {}
  episode_reward_max: 1.651812173844308
  episode_reward_mean: 0.2026317121681567
  episode_reward_min: -0.8017845027469366
  episodes_this_iter: 138
  episodes_total: 51571
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5849000757932663
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,435,50305.3,3476520,0.202632,1.65181,-0.801785,59.1159


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13938048
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.66939357886879
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05725707976847759
    agent_0_total_ball_to_goal_speed_reward_min: -0.41969601896723774
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-44-25
  done: false
  episode_len_mean: 64.71544715447155
  episode_media: {}
  episode_reward_max: 1.919311974107331
  episode_reward_mean: 0.20182859049278395
  episode_reward_min: -1.1037252428300692
  episodes_this_iter: 123
  episodes_total: 51694
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5877677446603775
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,436,50421.7,3484512,0.201829,1.91931,-1.10373,64.7154


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 13970016
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8261964319513646
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07219941880414509
    agent_0_total_ball_to_goal_speed_reward_min: -0.5028953964245775
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-46-22
  done: false
  episode_len_mean: 59.64393939393939
  episode_media: {}
  episode_reward_max: 1.2757409274597076
  episode_reward_mean: 0.18245861210406958
  episode_reward_min: -1.0773500487853054
  episodes_this_iter: 132
  episodes_total: 51826
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5842102113962173
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,437,50537.9,3492504,0.182459,1.27574,-1.07735,59.6439


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14001984
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8706868359654502
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07239745109532247
    agent_0_total_ball_to_goal_speed_reward_min: -0.5762043424062347
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-48-18
  done: false
  episode_len_mean: 65.20967741935483
  episode_media: {}
  episode_reward_max: 1.4586709407186573
  episode_reward_mean: 0.14328988182926905
  episode_reward_min: -1.1893167926984773
  episodes_this_iter: 124
  episodes_total: 51950
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.581071645617485
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,438,50654.4,3500496,0.14329,1.45867,-1.18932,65.2097


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14033952
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.7961882832753708
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07404261456764052
    agent_0_total_ball_to_goal_speed_reward_min: -0.8191883078608337
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-50-14
  done: false
  episode_len_mean: 64.05555555555556
  episode_media: {}
  episode_reward_max: 1.3611757752978928
  episode_reward_mean: 0.20458500491566456
  episode_reward_min: -1.0571020733478176
  episodes_this_iter: 126
  episodes_total: 52076
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5839290236234664
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,439,50770,3508488,0.204585,1.36118,-1.0571,64.0556


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14065920
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.234326889924167
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0806185883719284
    agent_0_total_ball_to_goal_speed_reward_min: -0.7008128255335118
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-52-11
  done: false
  episode_len_mean: 57.94074074074074
  episode_media: {}
  episode_reward_max: 1.457233103595799
  episode_reward_mean: 0.18522320643419077
  episode_reward_min: -0.8095689465472553
  episodes_this_iter: 135
  episodes_total: 52211
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5781569967269897
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,440,50886.9,3516480,0.185223,1.45723,-0.809569,57.9407


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14097888
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.879469251425583
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06788717910818201
    agent_0_total_ball_to_goal_speed_reward_min: -0.5598904126797671
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-54-07
  done: false
  episode_len_mean: 59.953125
  episode_media: {}
  episode_reward_max: 1.538609750679301
  episode_reward_mean: 0.127961035992399
  episode_reward_min: -1.1398130170936778
  episodes_this_iter: 128
  episodes_total: 52339
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5808554091453553
          entropy_coeff: 0.0
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,441,51003.3,3524472,0.127961,1.53861,-1.13981,59.9531


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14129856
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8237576824813458
    agent_0_total_ball_to_goal_speed_reward_mean: 0.008879772356791469
    agent_0_total_ball_to_goal_speed_reward_min: -0.5352178259873563
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-56-04
  done: false
  episode_len_mean: 63.71653543307087
  episode_media: {}
  episode_reward_max: 1.1910554080904037
  episode_reward_mean: 0.06646192124485335
  episode_reward_min: -1.2497332400933603
  episodes_this_iter: 127
  episodes_total: 52466
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5840917810201645
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,442,51119.7,3532464,0.0664619,1.19106,-1.24973,63.7165


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14161824
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9849535167913345
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07857635253943016
    agent_0_total_ball_to_goal_speed_reward_min: -0.6746352763558898
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-58-00
  done: false
  episode_len_mean: 63.48461538461538
  episode_media: {}
  episode_reward_max: 1.2630936286507475
  episode_reward_mean: 0.1280945994422293
  episode_reward_min: -1.1533937095905515
  episodes_this_iter: 130
  episodes_total: 52596
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5785082548856735
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,443,51235.9,3540456,0.128095,1.26309,-1.15339,63.4846


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14193792
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.9770517441540005
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08669769610416818
    agent_0_total_ball_to_goal_speed_reward_min: -0.5919131031580547
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_04-59-56
  done: false
  episode_len_mean: 65.77310924369748
  episode_media: {}
  episode_reward_max: 1.4009680630341905
  episode_reward_mean: 0.15487112062377933
  episode_reward_min: -1.153806903087569
  episodes_this_iter: 119
  episodes_total: 52715
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5857247617244721
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,444,51351.5,3548448,0.154871,1.40097,-1.15381,65.7731


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14225760
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.012610861861842
    agent_0_total_ball_to_goal_speed_reward_mean: 0.13397976284011767
    agent_0_total_ball_to_goal_speed_reward_min: -0.4806460759747835
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-01-53
  done: false
  episode_len_mean: 58.87323943661972
  episode_media: {}
  episode_reward_max: 1.539632728768624
  episode_reward_mean: 0.18494898038697746
  episode_reward_min: -1.1201557927587245
  episodes_this_iter: 142
  episodes_total: 52857
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5762273287773132
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,445,51468.6,3556440,0.184949,1.53963,-1.12016,58.8732


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14257728
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.6821937900244369
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04251557576884394
    agent_0_total_ball_to_goal_speed_reward_min: -0.5137871947829924
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-03-49
  done: false
  episode_len_mean: 59.91338582677165
  episode_media: {}
  episode_reward_max: 1.320385343046893
  episode_reward_mean: 0.13078573161708518
  episode_reward_min: -0.7056899561378622
  episodes_this_iter: 127
  episodes_total: 52984
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5807379972934723
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,446,51585,3564432,0.130786,1.32039,-0.70569,59.9134


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14289696
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.950159334996598
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10685365556978109
    agent_0_total_ball_to_goal_speed_reward_min: -0.5320025824477144
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-05-45
  done: false
  episode_len_mean: 66.8099173553719
  episode_media: {}
  episode_reward_max: 1.737610721494314
  episode_reward_mean: 0.20074492268423044
  episode_reward_min: -0.8811104896220399
  episodes_this_iter: 121
  episodes_total: 53105
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5825502835512161
          entropy_coeff: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,447,51700.4,3572424,0.200745,1.73761,-0.88111,66.8099


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14321664
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.2208838156397885
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10498212841286371
    agent_0_total_ball_to_goal_speed_reward_min: -0.5714286726319286
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-07-42
  done: false
  episode_len_mean: 65.68333333333334
  episode_media: {}
  episode_reward_max: 1.6582699508414929
  episode_reward_mean: 0.20310042397678005
  episode_reward_min: -1.157404621870032
  episodes_this_iter: 120
  episodes_total: 53225
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5792482249736786
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,448,51817.3,3580416,0.2031,1.65827,-1.1574,65.6833


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14353632
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8208337307672462
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04655434024498716
    agent_0_total_ball_to_goal_speed_reward_min: -0.8630243202201007
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-09-38
  done: false
  episode_len_mean: 65.74166666666666
  episode_media: {}
  episode_reward_max: 1.875381450877865
  episode_reward_mean: 0.19403262424875095
  episode_reward_min: -0.9910122110870321
  episodes_this_iter: 120
  episodes_total: 53345
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5861293475627899
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,449,51933.2,3588408,0.194033,1.87538,-0.991012,65.7417


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14385600
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8697856954745454
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09206304789036969
    agent_0_total_ball_to_goal_speed_reward_min: -0.5104416741960461
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-11-33
  done: false
  episode_len_mean: 69.67768595041322
  episode_media: {}
  episode_reward_max: 2.153467818852701
  episode_reward_mean: 0.19621440979477986
  episode_reward_min: -0.8944010912305731
  episodes_this_iter: 121
  episodes_total: 53466
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5764200712442398
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,450,52048.8,3596400,0.196214,2.15347,-0.894401,69.6777


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14417568
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8329738103169475
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05942408352356331
    agent_0_total_ball_to_goal_speed_reward_min: -0.5091737150758814
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-13-31
  done: false
  episode_len_mean: 60.55118110236221
  episode_media: {}
  episode_reward_max: 2.0971870869571543
  episode_reward_mean: 0.16181048477641513
  episode_reward_min: -1.2117670325421663
  episodes_this_iter: 127
  episodes_total: 53593
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5832677899599076
          entropy_coeff

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,451,52166,3604392,0.16181,2.09719,-1.21177,60.5512


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14449536
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.71416835402874
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0637802874356276
    agent_0_total_ball_to_goal_speed_reward_min: -0.7007929467297574
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-15-26
  done: false
  episode_len_mean: 55.205479452054796
  episode_media: {}
  episode_reward_max: 1.4807289498515817
  episode_reward_mean: 0.09776655286234114
  episode_reward_min: -1.7704307044258831
  episodes_this_iter: 146
  episodes_total: 53739
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5794927879571915
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,452,52281,3612384,0.0977666,1.48073,-1.77043,55.2055


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14481504
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0241859037279524
    agent_0_total_ball_to_goal_speed_reward_mean: 0.056016255819146904
    agent_0_total_ball_to_goal_speed_reward_min: -0.5446875598507767
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-17-18
  done: false
  episode_len_mean: 70.62931034482759
  episode_media: {}
  episode_reward_max: 1.5600240365476017
  episode_reward_mean: 0.21777275641408228
  episode_reward_min: -1.0935767271308308
  episodes_this_iter: 116
  episodes_total: 53855
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5791649090051652
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,453,52393.7,3620376,0.217773,1.56002,-1.09358,70.6293


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14513472
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.1395588280978413
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10797454992075042
    agent_0_total_ball_to_goal_speed_reward_min: -0.6070691133447004
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-19-11
  done: false
  episode_len_mean: 59.0763358778626
  episode_media: {}
  episode_reward_max: 2.1765598723260227
  episode_reward_mean: 0.25360639919697614
  episode_reward_min: -0.867334020760703
  episodes_this_iter: 131
  episodes_total: 53986
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5788749415874481
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,454,52505.9,3628368,0.253606,2.17656,-0.867334,59.0763


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14545440
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.885450596043589
    agent_0_total_ball_to_goal_speed_reward_mean: 0.05611894697749828
    agent_0_total_ball_to_goal_speed_reward_min: -0.5422725173014733
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-21-03
  done: false
  episode_len_mean: 62.5859375
  episode_media: {}
  episode_reward_max: 1.1374439076316056
  episode_reward_mean: 0.14110135939562785
  episode_reward_min: -0.9033138691651248
  episodes_this_iter: 128
  episodes_total: 54114
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5799011744260788
          entropy_coeff: 0.0
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,455,52618.4,3636360,0.141101,1.13744,-0.903314,62.5859


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14577408
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.8924077901397355
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06969960823939574
    agent_0_total_ball_to_goal_speed_reward_min: -0.588918401274288
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-22-56
  done: false
  episode_len_mean: 62.30708661417323
  episode_media: {}
  episode_reward_max: 1.8188064401993222
  episode_reward_mean: 0.17477461405606762
  episode_reward_min: -0.7748396505296755
  episodes_this_iter: 127
  episodes_total: 54241
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5823779431581497
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,456,52731.3,3644352,0.174775,1.81881,-0.77484,62.3071


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14609376
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0966103653405108
    agent_0_total_ball_to_goal_speed_reward_mean: 0.016415087553672016
    agent_0_total_ball_to_goal_speed_reward_min: -0.9035109045051111
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-24-48
  done: false
  episode_len_mean: 77.47115384615384
  episode_media: {}
  episode_reward_max: 1.4222043048395707
  episode_reward_mean: 0.09731738492504106
  episode_reward_min: -1.3679633064458714
  episodes_this_iter: 104
  episodes_total: 54345
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5833555316925049
          entropy_coef

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,457,52843.2,3652344,0.0973174,1.4222,-1.36796,77.4712


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14641344
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.0211751196334595
    agent_0_total_ball_to_goal_speed_reward_mean: 0.1023593638766771
    agent_0_total_ball_to_goal_speed_reward_min: -0.42541221929249745
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-26-41
  done: false
  episode_len_mean: 61.140625
  episode_media: {}
  episode_reward_max: 1.379246228706546
  episode_reward_mean: 0.18431538002741837
  episode_reward_min: -1.0128734401362673
  episodes_this_iter: 128
  episodes_total: 54473
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5798888367414474
          entropy_coeff: 0.0
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,458,52956.2,3660336,0.184315,1.37925,-1.01287,61.1406


Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14673312
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 0.5736015505833182
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03947655572987133
    agent_0_total_ball_to_goal_speed_reward_min: -0.4105609758120331
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-28-34
  done: false
  episode_len_mean: 64.65322580645162
  episode_media: {}
  episode_reward_max: 1.0588646567848579
  episode_reward_mean: 0.19378583247260742
  episode_reward_min: -0.988709192286275
  episodes_this_iter: 124
  episodes_total: 54597
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5816890300512314
          entropy_coeff:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,459,53069.2,3668328,0.193786,1.05886,-0.988709,64.6532




Result for PPO_Soccer_99818_00000:
  agent_timesteps_total: 14705280
  custom_metrics:
    agent_0_total_ball_to_goal_speed_reward_max: 1.111516821065538
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06396839169438663
    agent_0_total_ball_to_goal_speed_reward_min: -0.6172069569580765
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-11_05-30-32
  done: false
  episode_len_mean: 65.70967741935483
  episode_media: {}
  episode_reward_max: 1.7167837254269966
  episode_reward_mean: 0.15595280564260766
  episode_reward_min: -1.921030598347008
  episodes_this_iter: 124
  episodes_total: 54721
  experiment_id: 9678828e9628402594233c225cebca19
  hostname: bruno-odyssey-mint
  info:
    learner:
      main:
        learner_stats:
          allreduce_latency: 0.0
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.0000000000000016e-05
          entropy: 0.5752486907243729
          entropy_coeff: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,460,53187,3676320,0.155953,1.71678,-1.92103,65.7097


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_99818_00000,RUNNING,192.168.0.104:78457,460,53187,3676320,0.155953,1.71678,-1.92103,65.7097


2021-12-11 05:30:33,858	ERROR tune.py:545 -- Trials did not complete: [PPO_Soccer_99818_00000]
2021-12-11 05:30:33,889	INFO tune.py:549 -- Total run time: 53261.68 seconds (53260.09 seconds for the tuning loop).


PPO_Soccer_99818_00000
/home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_rewards_1.4/PPO_Soccer_99818_00000_0_2021-12-10_14-42-52/checkpoint_000350/checkpoint-350
Done training


(<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x7f30e078de20>,
 PPO_Soccer_99818_00000,
 '/home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_rewards_1.4/PPO_Soccer_99818_00000_0_2021-12-10_14-42-52/checkpoint_000350/checkpoint-350')

## Export agent

In [12]:
this_path = os.path.dirname(os.path.realpath("__file__"))
# print('this_path', this_path)


def export_agent(agent_file: str, TRIAL, agent_name="bajai_belzonte", makeZip=False):
    agent_path = os.path.join(f'{this_path}/agents', agent_name)
    os.makedirs(agent_path, exist_ok=True)


    shutil.rmtree(agent_path)
    os.makedirs(agent_path)

    # salva a classe do agente
    with open(os.path.join(agent_path, "agent.py"), "w") as f:
        f.write(agent_file)

    # salva um __init__ para criar o módulo Python
    with open(os.path.join(agent_path, "__init__.py"), "w") as f:
        f.write("from .agent import MyRaySoccerAgent")

    # copia o trial inteiro, incluindo os arquivos de configuração do experimento
    print(f"TRIALLL {TRIAL}")
    shutil.copytree(TRIAL, os.path.join(
        agent_path, TRIAL.split("ray_results/")[1]), )

    # empacota tudo num arquivo .zip
    if makeZip:
        shutil.make_archive(os.path.join(agent_path, agent_name),
                            "zip", os.path.join(agent_path, agent_name))


def get_agent_file_str(ALGORITHM, CHECKPOINT, POLICY_NAME="main"):
    return f"""
import pickle
import os
from typing import Dict

import gym
import numpy as np
import ray
from ray import tune
from ray.rllib.env.base_env import BaseEnv
from ray.tune.registry import get_trainable_cls

from soccer_twos import AgentInterface

ALGORITHM = "{ALGORITHM}"
CHECKPOINT_PATH = os.path.join(
    os.path.dirname(os.path.abspath(__file__)), 
    "{CHECKPOINT.split("ray_results/")[1]}"
)
POLICY_NAME = "{POLICY_NAME}"


class MyRaySoccerAgent(AgentInterface):
    def __init__(self, env: gym.Env):
        super().__init__()
        ray.init(ignore_reinit_error=True)

        # Load configuration from checkpoint file.
        config_path = ""
        if CHECKPOINT_PATH:
            config_dir = os.path.dirname(CHECKPOINT_PATH)
            config_path = os.path.join(config_dir, "params.pkl")
            # Try parent directory.
            if not os.path.exists(config_path):
                config_path = os.path.join(config_dir, "../params.pkl")

        # Load the config from pickled.
        if os.path.exists(config_path):
            with open(config_path, "rb") as f:
                config = pickle.load(f)
        else:
            # If no config in given checkpoint -> Error.
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory!"
            )

        # no need for parallelism on evaluation
        config["num_workers"] = 0
        config["num_gpus"] = 0

        # create a dummy env since it's required but we only care about the policy
        tune.registry.register_env("DummyEnv", lambda *_: BaseEnv())
        config["env"] = "DummyEnv"

        # create the Trainer from config
        cls = get_trainable_cls(ALGORITHM)
        agent = cls(env=config["env"], config=config)
        # load state from checkpoint
        agent.restore(CHECKPOINT_PATH)
        # get policy for evaluation
        self.policy = agent.get_policy(POLICY_NAME)

    def act(self, observation: Dict[int, np.ndarray]) -> Dict[int, np.ndarray]:
        actions = {{}}
        for player_id in observation:
            # compute_single_action returns a tuple of (action, action_info, ...)
            # as we only need the action, we discard the other elements
            actions[player_id], *_ = self.policy.compute_single_action(
                observation[player_id]
            )
        return actions

"""


def getAnalysis(experiment: str):
    return Analysis(experiment)


def export():
    # PPO_Soccer_18d23_00000
    # /home/bruno/Workspace/soccer-tows-player/src/ray_results/Testing_env/PPO_Soccer_18d23_00000_0_2021-11-24_20-34-41/checkpoint_000500/checkpoint-500
    analysis = getAnalysis(
        "/home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_rewards_1.4/PPO_Soccer_24976_00000_0_2021-12-08_15-46-24")

    ALGORITHM = "PPO"
    TRIAL = analysis.get_best_logdir("training_iteration", "max")
    CHECKPOINT = analysis.get_best_checkpoint(
        TRIAL,
        "training_iteration",
        "max",
    )

    print(TRIAL, CHECKPOINT)
    agent_file = get_agent_file_str(ALGORITHM, CHECKPOINT)
    export_agent(agent_file, TRIAL)


export()


/home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_rewards_1.4/PPO_Soccer_99818_00000_0_2021-12-10_14-42-52 /home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_rewards_1.4/PPO_Soccer_99818_00000_0_2021-12-10_14-42-52/checkpoint_000450/checkpoint-450
TRIALLL /home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_rewards_1.4/PPO_Soccer_99818_00000_0_2021-12-10_14-42-52
