# Iniciar ambiente

## Iniciar Local

In [1]:
import os
isColab = False

## (Sempre) Outras configurações

In [2]:
# # Ambiente da competição
# !pip install --upgrade ceia-soccer-twos > /dev/null 2>&1
# # a versão do ray compatível com a implementação dos agentes disponibilizada é a 1.4.0
# !pip install 'aioredis==1.3.1' > /dev/null 2>&1
# !pip install 'aiohttp==3.7.4' > /dev/null 2>&1
# !pip install 'ray==1.4.0' > /dev/null 2>&1
# !pip install 'ray[rllib]==1.4.0' > /dev/null 2>&1
# !pip install 'ray[tune]==1.4.0' > /dev/null 2>&1
# !pip install torch > /dev/null 2>&1
# !pip install lz4 > /dev/null 2>&1
# !pip install GPUtil > /dev/null 2>&1

# # Dependências necessárias para gravar os vídeos
# !apt-get install - y xvfb x11-utils > /dev/null 2>&1
# !pip install 'pyvirtualdisplay==0.2.*' > /dev/null 2>&1
# !pip install tensorboard > /dev/null 2>&1


# Soccer Twos

Como tarefa bônus, experimente com os algoritmos aprendidos no ambiente `soccer_twos`, que será utilizado na competição final deste curso*. Para facilitar, utilize a variação `team_vs_policy` como no laboratório anterior.

<img src="https://raw.githubusercontent.com/bryanoliveira/soccer-twos-env/master/images/screenshot.png" height="400">

> Visualização do ambiente

Este ambiente consiste em um jogo de futebol de carros 2x2, ou seja, o objetivo é marcar um gol no adversário o mais rápido possível. Na variação `team_vs_policy`, seu agente controla um jogador do time azul e joga contra um time aleatório. Mais informações sobre o ambiente podem ser encontradas [no repositório](https://github.com/bryanoliveira/soccer-twos-env) e [na documentação do Unity ml-agents](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md#soccer-twos).


**Sua tarefa é treinar um agente com a interface do Ray apresentada, experimentando com diferentes algoritmos e hiperparâmetros.**


<br>

*A variação utilizada na competição será a `multiagent_player`, mas agentes treinados para `team_vs_policy` podem ser facilmente adaptados. Na seção "Exportando seu agente treinado" o agente "MyDqnSoccerAgent" faz exatamente isso.

## Imports

In [1]:
import gym

import ray
from ray import tune
from ray.tune import Analysis
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.env import BaseEnv
from ray.rllib.evaluation.episode import MultiAgentEpisode
from ray.rllib.evaluation.rollout_worker import RolloutWorker
from ray.rllib.policy import Policy
from ray.rllib.agents.callbacks import DefaultCallbacks
from ray.rllib.utils.typing import PolicyID

import numpy as np
from typing import Any, Dict, List, Union, Optional
from collections import deque

import soccer_twos
from soccer_twos import EnvType

import shutil



## Wrapper

In [4]:
import gym
from typing import Any, Dict, List, Union

from ray.rllib.env.multi_agent_env import MultiAgentEnv
import numpy as np
from collections import deque

MAX_STEPS = 1000
MATCH_STEPS = 4000

def get_scalar_projection(x, y):
    return np.dot(x, y) / np.linalg.norm(y)


# Os seguintes valores foram obtidos experimentalmente executando pré-experimentos
# A partir desses valores vamops derivar vários outros como posições ddos gols etc
min_ball_position_x, max_ball_position_x = - \
    15.563264846801758, 15.682827949523926
min_ball_position_y, max_ball_position_y = -7.08929967880249, 7.223850250244141
min_player_position_x, max_player_position_x = - \
    17.26804542541504, 17.16301727294922
min_player_position_y, max_player_position_y = - \
    7.399587631225586, 7.406457424163818
min_ball_to_goal_avg_velocity, max_ball_to_goal_avg_velocity = - \
    -23.366606239568615, 23.749571761530724

max_ball_abs_velocity = 78.25721740722656
max_goals_one_team = -9999999
max_goals_one_match = -9999999
max_steps = -999999

max_diff_reward = -np.inf

# Infered
max_ball_abs_avg_velocity = max(
    abs(min_ball_to_goal_avg_velocity), abs(max_ball_to_goal_avg_velocity))


SPEED_IMPORTANCE = 1.0 / (14.0)
CLIP_SPEED_REWARD_BY_SPEED_IMPORTANCE = True

AFTER_BALL_STEP_PENALTY = 1 / MAX_STEPS #0.001

# OBS.: Este hyperparâmetro não pode ser modificado sem fazer novos testes em
# min_ball_to_goal_avg_velocity e
# max_ball_to_goal_avg_velocity:
AVG_SPEED_TIMESTEPS_WINDOW = 1


def is_after_the_ball(player_id: int, player_pos: np.array, ball_pos: np.array):
    if player_id in range(2):
        return player_pos[0] > ball_pos[0]
    elif player_id in [2, 3]:
        return player_pos[0] < ball_pos[0]


def get_center_of_goal_pos(player_id):
    global min_ball_position_x, max_ball_position_x, \
        min_ball_position_y, max_ball_position_y, \
        min_player_position_x, max_player_position_x, \
        min_player_position_y, max_player_position_y
    if player_id in [0, 1]:
        return np.array([max_ball_position_x, 0.0])
    elif player_id in [2, 3]:
        return np.array([min_ball_position_x, 0.0])


def calculate_ball_to_goal_scalar_velocity(player_id: int, info: Dict):
    goal_pos = get_center_of_goal_pos(player_id)
    # print(f"goal_pos: {goal_pos}")
    ball_pos = info["ball_info"]["position"]
    # print(f"ball_pos: {ball_pos}")
    direction_to_center_of_goal = goal_pos - ball_pos
    # print(f"direction_to_center_of_goal: {direction_to_center_of_goal}")

    ball_velocity = info["ball_info"]["velocity"]

    # global max_ball_abs_velocity
    # if np.linalg.norm(ball_velocity) > max_ball_abs_velocity:
    #     max_ball_abs_velocity = np.linalg.norm(ball_velocity)

    # print(f"ball_velocity: {ball_velocity}")
    ball_velocity_to_center_of_goal = get_scalar_projection(
        ball_velocity, direction_to_center_of_goal)
    # print(f"ball_velocity_to_center_of_goal: {ball_velocity_to_center_of_goal}")
    return ball_velocity_to_center_of_goal

# print('ball_velocity_to_center_of_goal', calculate_ball_to_goal_scalar_velocity(0, { "ball_info": { "position": np.array([3.0, 2.0]), "velocity": np.array([0.0, 0.0]) }}))


class CustomRewardWrapper(gym.core.Wrapper, MultiAgentEnv):
    # def __init__(self, env):
    #     gym.Wrapper.__init__(self, env)

    def step(self, action: Union[Dict[int, List[Any]], List[Any]]):
        obs, rewards, done, info = super().step(action)

        # print(info)
        # if rewards[0] > 0.0:
        #     assert False

        if type(action) is dict:
            new_rewards = {k: self._calculate_reward(
                rewards[k], k, info[k]) for k in info.keys()}
        else:
            raise NotImplementedError('Necessário implementar!')

        if type(action) is dict:
            splitted_rets = {k: self._calculate_reward(
                rewards[k], k, info[k], splitted_returns=True) for k in info.keys()}
        else:
            raise NotImplementedError('Necessário implementar!')


        info = {
            i: {
                **info[i],
                "ep_metrics": {
                    # "total_timesteps": np.array([0.0008], dtype=np.float32)
                    "total_timesteps": self.n_step + 1,
                    "total_goals": self.scoreboard["team_0"] + self.scoreboard["team_1"],
                    "goals_opponent": self.scoreboard["team_1"] if i in range(2) else self.scoreboard["team_0"],
                    "goals_in_favor": self.scoreboard["team_0"] if i in range(2) else self.scoreboard["team_1"],
                    "team_0_goals": self.scoreboard["team_0"],
                    "team_1_goals": self.scoreboard["team_1"],
                    "episode_ended": done["__all__"],
                    "have_goals": self.scoreboard["team_0"] + self.scoreboard["team_1"] > 0,
                    "env_reward": splitted_rets[i][0],
                    "ball_to_goal_speed_reward": splitted_rets[i][1],
                    "agent_position_to_ball_reward": splitted_rets[i][2],
                }
            } for i in info.keys()
        }

        # global min_ball_position_x, max_ball_position_x, \
        #     min_ball_position_y, max_ball_position_y, \
        #     min_player_position_x, max_player_position_x, \
        #     min_player_position_y, max_player_position_y, \
        #     max_goals_one_team, max_goals_one_match
        # if done:
        #     print(f'min_ball_position_x: {min_ball_position_x}')
        #     print(f'max_ball_position_x: {max_ball_position_x}')
        #     print(f'min_ball_position_y: {min_ball_position_y}')
        #     print(f'max_ball_position_y: {max_ball_position_y}')
        #     print(f'min_player_position_x: {min_player_position_x}')
        #     print(f'max_player_position_x: {max_player_position_x}')
        #     print(f'min_player_position_y: {min_player_position_y}')
        #     print(f'max_player_position_y: {max_player_position_y}')
        #     print(f'min_ball_to_goal_avg_velocity: {min_ball_to_goal_avg_velocity}')
        #     print(f'max_ball_to_goal_avg_velocity: {max_ball_to_goal_avg_velocity}')
        #     print(f'max_goals_one_team: {max_goals_one_team}')
        #     print(f'max_goals_one_match: {max_goals_one_match}')
        #     print(self.scoreboard)
        #     print(f'Done... last n_step: {self.n_step}')
        #     if self.scoreboard["team_0"] > 0 or self.scoreboard["team_1"] > 0:
        #         input("Press Enter to continue...")

        # global max_steps
        # if done:
        #     if self.n_step + 1 > max_steps:
        #         max_steps = self.n_step + 1
        #     print('max_steps', max_steps)

        # global max_diff_reward
        # if done:
        #     print(f'max_diff_reward: {max_diff_reward}')
        #     print(f'min_ball_to_goal_avg_velocity: {min_ball_to_goal_avg_velocity}')
        #     print(f'max_ball_to_goal_avg_velocity: {max_ball_to_goal_avg_velocity}')

        # if done:
        #     print(f'max_ball_abs_velocity: {max_ball_abs_velocity}')

        self.n_step += 1
        return obs, new_rewards, done, info

    def reset(self, **kwargs):
        obs = super().reset(**kwargs)
        self.n_step = 0
        self.last_ball_speed_mean_per_player = {0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0}
        self.ball_speed_deque_per_player = {0: deque(maxlen=AVG_SPEED_TIMESTEPS_WINDOW),
                                            1: deque(maxlen=AVG_SPEED_TIMESTEPS_WINDOW),
                                            2: deque(maxlen=AVG_SPEED_TIMESTEPS_WINDOW),
                                            3: deque(maxlen=AVG_SPEED_TIMESTEPS_WINDOW)}
        self.scoreboard = {"team_0": 0, "team_1": 0}
        self.await_press = False
        # print(f'min_ball_to_goal_avg_velocity: {min_ball_to_goal_avg_velocity}')
        # print(f'max_ball_to_goal_avg_velocity: {max_ball_to_goal_avg_velocity}')
        return obs

    def _calculate_reward(self, reward: float, player_id: int, info: Dict, splitted_returns=False) -> float:
        # print('calculating reward')
        if reward != 0.0:
            # print('Goal was made!', reward, info)
            self._update_scoreboard(player_id, reward)
        # global min_ball_position_x, max_ball_position_x, \
        #     min_ball_position_y, max_ball_position_y, \
        #     min_player_position_x, max_player_position_x, \
        #     min_player_position_y, max_player_position_y
        # print(f"info: {info}")
        # if info["ball_info"]["position"][0] < min_ball_position_x:
        #     min_ball_position_x = info["ball_info"]["position"][0]
        # if info["ball_info"]["position"][0] > max_ball_position_x:
        #     max_ball_position_x = info["ball_info"]["position"][0]
        # if info["ball_info"]["position"][1] < min_ball_position_y:
        #     min_ball_position_y = info["ball_info"]["position"][1]
        # if info["ball_info"]["position"][1] > max_ball_position_y:
        #     max_ball_position_y = info["ball_info"]["position"][1]
        # if info["player_info"]["position"][0] < min_player_position_x:
        #     min_player_position_x = info["player_info"]["position"][0]
        # if info["player_info"]["position"][0] > max_player_position_x:
        #     max_player_position_x = info["player_info"]["position"][0]
        # if info["player_info"]["position"][1] < min_player_position_y:
        #     min_player_position_y = info["player_info"]["position"][1]
        # if info["player_info"]["position"][1] > max_player_position_y:
        #     max_player_position_y = info["player_info"]["position"][1]

        self._update_avg_ball_speed_to_goal(
            player_id, calculate_ball_to_goal_scalar_velocity(player_id, info))
        # global max_diff_reward
        # if (np.abs(SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity) > max_diff_reward):
        #     max_diff_reward = SPEED_IMPORTANCE * \
        #         self.last_ball_speed_mean_per_player[player_id] / \
        #         max_ball_abs_avg_velocity

        ball_pos = info["ball_info"]["position"]
        player_pos = info["player_info"]["position"]

        env_reward = reward
        ball_to_goal_speed_reward = np.clip(SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity, -SPEED_IMPORTANCE,
                               SPEED_IMPORTANCE) if CLIP_SPEED_REWARD_BY_SPEED_IMPORTANCE else SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity
        agent_position_to_ball_reward = is_after_the_ball(player_id, player_pos,
                                  ball_pos) * (-AFTER_BALL_STEP_PENALTY)

        if splitted_returns:
            return (env_reward, ball_to_goal_speed_reward, agent_position_to_ball_reward)
        return env_reward + ball_to_goal_speed_reward + agent_position_to_ball_reward
        if CLIP_SPEED_REWARD_BY_SPEED_IMPORTANCE:
            # print(reward + np.clip(SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity, -SPEED_IMPORTANCE, SPEED_IMPORTANCE))
            return reward + \
                np.clip(SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity, -SPEED_IMPORTANCE, SPEED_IMPORTANCE) + \
                is_after_the_ball(player_id, player_pos,
                                  ball_pos) * AFTER_BALL_STEP_PENALTY
        return reward + \
            SPEED_IMPORTANCE * self.last_ball_speed_mean_per_player[player_id] / max_ball_abs_avg_velocity + \
            is_after_the_ball(player_id, player_pos,
                              ball_pos) * AFTER_BALL_STEP_PENALTY

    def _update_avg_ball_speed_to_goal(self, player_id: int, ball_speed: float):
        assert player_id in [0, 1, 2, 3]
        global min_ball_to_goal_avg_velocity, max_ball_to_goal_avg_velocity

        # Getting min/max ball to goal speed forr normalization
        # print(f'player_id: {player_id}')
        # print(f'self.last_ball_speed_mean_per_player: {self.last_ball_speed_mean_per_player}')
        # print(f'self.n_step: {self.n_step}')
        # print(f'ball_speed: {ball_speed}')

        self.ball_speed_deque_per_player[player_id].append(ball_speed)
        avg = np.mean(self.ball_speed_deque_per_player[player_id])
        # if avg < min_ball_to_goal_avg_velocity:
        #     min_ball_to_goal_avg_velocity = avg
        # elif avg > max_ball_to_goal_avg_velocity:
        #     max_ball_to_goal_avg_velocity = avg

        self.last_ball_speed_mean_per_player[player_id] = avg

    def _update_scoreboard(self, player_id, reward):
        global max_goals_one_team, max_goals_one_match

        if player_id == 0 and reward == -1.0:
            self.scoreboard["team_1"] += 1
            # print(self.scoreboard)

            # if self.scoreboard["team_1"] > max_goals_one_team:
            #     max_goals_one_team = self.scoreboard["team_1"]
            # if self.scoreboard["team_0"] + self.scoreboard["team_1"] > max_goals_one_match:
            #     max_goals_one_match = self.scoreboard["team_0"] + \
            #         self.scoreboard["team_1"]
            # if max_goals_one_match > 0:
            #     if not self.await_press:
            #         input("Press Enter to continue...")
            #         self.await_press = True
            #     else:
            #         self.await_press = False
        elif player_id == 2 and reward == -1.0:
            self.scoreboard["team_0"] += 1
            # print(self.scoreboard)

            # if self.scoreboard["team_0"] > max_goals_one_team:
            #     max_goals_one_team = self.scoreboard["team_0"]
            # if self.scoreboard["team_0"] + self.scoreboard["team_1"] > max_goals_one_match:
            #     max_goals_one_match = self.scoreboard["team_0"] + \
            #         self.scoreboard["team_1"]
            # if max_goals_one_match > 0:
            #     if not self.await_press:
            #         input("Press Enter to continue...")
            #         self.await_press = True
            #     else:
            #         self.await_press = False


## Utils

In [5]:
class RLLibWrapper(gym.core.Wrapper, MultiAgentEnv):
    """
    A RLLib wrapper so our env can inherit from MultiAgentEnv.
    """

    pass


def create_rllib_env(env_config: dict = {}):
    """
    Creates a RLLib environment and prepares it to be instantiated by Ray workers.
    Args:
        env_config: configuration for the environment.
            You may specify the following keys:
            - variation: one of soccer_twos.EnvType. Defaults to EnvType.multiagent_player.
            - opponent_policy: a Callable for your agent to train against. Defaults to a random policy.
    """
    if hasattr(env_config, "worker_index"):
        env_config["worker_id"] = (
            env_config.worker_index * env_config.get("num_envs_per_worker", 1)
            + env_config.vector_index
        )
    env = soccer_twos.make(**env_config)
    if "multiagent" in env_config and not env_config["multiagent"]:
        # is multiagent by default, is only disabled if explicitly set to False
        return env
    return RLLibWrapper(env)


def create_custom_env(env_config: dict = {}):
    env = create_rllib_env(env_config)
    return CustomRewardWrapper(env)

## Callback

In [6]:
class Callback(DefaultCallbacks):
    def on_episode_step(self,
                        *,
                        worker: "RolloutWorker",
                        base_env: BaseEnv,
                        episode: MultiAgentEpisode,
                        env_index: Optional[int] = None,
                        **kwargs) -> None:
        total_timesteps = episode.last_info_for(
            0)["ep_metrics"]["total_timesteps"]
        total_goals = float(episode.last_info_for(0)[
                            "ep_metrics"]["total_goals"])
        estimated_goals_in_match = total_goals * MATCH_STEPS / \
            float(total_timesteps) if total_goals > 0 else 0.0
        timesteps_to_goal = float(
            total_timesteps) if total_goals > 0 else 9999.0

        if not episode.user_data:
            episode.user_data = {
                0: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    "total_agent_position_to_ball_reward": 0.0,
                },
                1: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    "total_agent_position_to_ball_reward": 0.0,
                },
                2: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    "total_agent_position_to_ball_reward": 0.0,
                },
                3: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    "total_agent_position_to_ball_reward": 0.0,
                }
            }

        episode.user_data = {
            **episode.user_data,
            0: {
                "total_env_reward": episode.user_data[0]["total_env_reward"] + episode.last_info_for(0)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[0]["total_ball_to_goal_speed_reward"] + episode.last_info_for(0)["ep_metrics"]["ball_to_goal_speed_reward"],
                "total_agent_position_to_ball_reward": episode.user_data[0]["total_agent_position_to_ball_reward"] + episode.last_info_for(0)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            1: {
                "total_env_reward": episode.user_data[1]["total_env_reward"] + episode.last_info_for(1)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[1]["total_ball_to_goal_speed_reward"] + episode.last_info_for(1)["ep_metrics"]["ball_to_goal_speed_reward"],
                "total_agent_position_to_ball_reward": episode.user_data[1]["total_agent_position_to_ball_reward"] + episode.last_info_for(1)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            2: {
                "total_env_reward": episode.user_data[2]["total_env_reward"] + episode.last_info_for(2)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[2]["total_ball_to_goal_speed_reward"] + episode.last_info_for(2)["ep_metrics"]["ball_to_goal_speed_reward"],
                "total_agent_position_to_ball_reward": episode.user_data[2]["total_agent_position_to_ball_reward"] + episode.last_info_for(2)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            3: {
                "total_env_reward": episode.user_data[3]["total_env_reward"] + episode.last_info_for(3)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[3]["total_ball_to_goal_speed_reward"] + episode.last_info_for(3)["ep_metrics"]["ball_to_goal_speed_reward"],
                "total_agent_position_to_ball_reward": episode.user_data[3]["total_agent_position_to_ball_reward"] + episode.last_info_for(3)["ep_metrics"]["agent_position_to_ball_reward"],
            }
        }

        episode.custom_metrics = {
            # "total_timesteps": total_timesteps,
            # "timesteps_to_goal": timesteps_to_goal,
            # "estimated_goals_in_match": estimated_goals_in_match,
            # "team_0_goals": episode.last_info_for(0)["ep_metrics"]["team_0_goals"],
            # "team_1_goals": episode.last_info_for(0)["ep_metrics"]["team_1_goals"],
            # "have_goals": episode.last_info_for(0)["ep_metrics"]["have_goals"],
            "agent_0_total_env_reward": episode.user_data[0]["total_env_reward"],
            "agent_0_total_ball_to_goal_speed_reward": episode.user_data[0]["total_ball_to_goal_speed_reward"],
            "agent_0_total_agent_position_to_ball_reward": episode.user_data[0]["total_agent_position_to_ball_reward"],
        }

    def on_episode_end(self,
                       *,
                       worker: "RolloutWorker",
                       base_env: BaseEnv,
                       policies: Dict[PolicyID, Policy],
                       episode: MultiAgentEpisode,
                       env_index: Optional[int] = None,
                       **kwargs) -> None:
        total_timesteps = episode.last_info_for(
            0)["ep_metrics"]["total_timesteps"]
        total_goals = float(episode.last_info_for(0)[
                            "ep_metrics"]["total_goals"])
        estimated_goals_in_match = total_goals * MATCH_STEPS / \
            float(total_timesteps) if total_goals > 0 else 0.0
        timesteps_to_goal = float(
            total_timesteps) if total_goals > 0 else 9999.0

        if not episode.user_data:
            episode.user_data = {
                0: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    "total_agent_position_to_ball_reward": 0.0,
                },
                1: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    "total_agent_position_to_ball_reward": 0.0,
                },
                2: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    "total_agent_position_to_ball_reward": 0.0,
                },
                3: {
                    "total_env_reward": 0.0,
                    "total_ball_to_goal_speed_reward": 0.0,
                    "total_agent_position_to_ball_reward": 0.0,
                }
            }

        episode.user_data = {
            **episode.user_data,
            0: {
                "total_env_reward": episode.user_data[0]["total_env_reward"] + episode.last_info_for(0)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[0]["total_ball_to_goal_speed_reward"] + episode.last_info_for(0)["ep_metrics"]["ball_to_goal_speed_reward"],
                "total_agent_position_to_ball_reward": episode.user_data[0]["total_agent_position_to_ball_reward"] + episode.last_info_for(0)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            1: {
                "total_env_reward": episode.user_data[1]["total_env_reward"] + episode.last_info_for(1)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[1]["total_ball_to_goal_speed_reward"] + episode.last_info_for(1)["ep_metrics"]["ball_to_goal_speed_reward"],
                "total_agent_position_to_ball_reward": episode.user_data[1]["total_agent_position_to_ball_reward"] + episode.last_info_for(1)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            2: {
                "total_env_reward": episode.user_data[2]["total_env_reward"] + episode.last_info_for(2)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[2]["total_ball_to_goal_speed_reward"] + episode.last_info_for(2)["ep_metrics"]["ball_to_goal_speed_reward"],
                "total_agent_position_to_ball_reward": episode.user_data[2]["total_agent_position_to_ball_reward"] + episode.last_info_for(2)["ep_metrics"]["agent_position_to_ball_reward"],
            },
            3: {
                "total_env_reward": episode.user_data[3]["total_env_reward"] + episode.last_info_for(3)["ep_metrics"]["env_reward"],
                "total_ball_to_goal_speed_reward": episode.user_data[3]["total_ball_to_goal_speed_reward"] + episode.last_info_for(3)["ep_metrics"]["ball_to_goal_speed_reward"],
                "total_agent_position_to_ball_reward": episode.user_data[3]["total_agent_position_to_ball_reward"] + episode.last_info_for(3)["ep_metrics"]["agent_position_to_ball_reward"],
            }
        }

        episode.custom_metrics = {
            # "total_timesteps": total_timesteps,
            # "timesteps_to_goal": timesteps_to_goal,
            # "estimated_goals_in_match": estimated_goals_in_match,
            # "team_0_goals": episode.last_info_for(0)["ep_metrics"]["team_0_goals"],
            # "team_1_goals": episode.last_info_for(0)["ep_metrics"]["team_1_goals"],
            # "have_goals": episode.last_info_for(0)["ep_metrics"]["have_goals"],
            "agent_0_total_env_reward": episode.user_data[0]["total_env_reward"],
            "agent_0_total_ball_to_goal_speed_reward": episode.user_data[0]["total_ball_to_goal_speed_reward"],
            "agent_0_total_agent_position_to_ball_reward": episode.user_data[0]["total_agent_position_to_ball_reward"],
        }


## Stop

In [7]:
stop = {
    "timesteps_total": 15000000,  # 15M
    # "time_total_s": 14400, # 4h
    # "episodes_total": 1,
}


## Config


In [8]:
# NUM_ENVS_PER_WORKER = 1
NUM_ENVS_PER_WORKER = 4
ENVIRONMENT_ID = "Soccer"

ENVIRONMENT_CONFIG = {
    "num_envs_per_worker": NUM_ENVS_PER_WORKER,
    "variation": EnvType.multiagent_player,
}


temp_env = create_custom_env(ENVIRONMENT_CONFIG)
obs_space = temp_env.observation_space
act_space = temp_env.action_space
temp_env.close()


config = {
    # system settings
    "num_gpus": 1,
    # "num_workers": 3,
    "num_workers": 0,
    "num_envs_per_worker": NUM_ENVS_PER_WORKER,
    "num_cpus_for_driver": 8,
    "num_cpus_per_worker": 1,
    "num_gpus_per_worker": 1,
    "log_level": "INFO",
    "framework": "torch",
    # RL setup
    "multiagent": {
        "policies": {
            "default": (None, obs_space, act_space, {}),
        },
        "policy_mapping_fn": tune.function(lambda _: "default"),
        "policies_to_train": ["default"],
    },
    "env": ENVIRONMENT_ID,
    "env_config": ENVIRONMENT_CONFIG,
    "callbacks": Callback,
}

[INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0


INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0


[INFO] Connected new brain: SoccerTwos?team=1


INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1


[INFO] Connected new brain: SoccerTwos?team=0


INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


## Run experiment

In [9]:
def run_experiment():
    ray.init(num_cpus=8, include_dashboard=False, ignore_reinit_error=True)

    tune.registry.register_env(ENVIRONMENT_ID, create_custom_env)

    analysis = tune.run(
        "PPO",
        num_samples=1,
        name="PPO_multiagent_player_custom_rewards",
        # name="Measuring_rewards",
        config=config,
        stop=stop,
        checkpoint_freq=100,
        checkpoint_at_end=True,
        local_dir="../../ray_results",
        # restore="../../ray_results/PPO_selfplay_1/PPO_Soccer_ID/checkpoint_00X/checkpoint-X",
        resume=True
    )

    # Gets best trial based on max accuracy across all training iterations.
    best_trial = analysis.get_best_trial("episode_reward_mean", mode="max")
    print(best_trial)
    # Gets best checkpoint for trial based on accuracy.
    best_checkpoint = analysis.get_best_checkpoint(
        trial=best_trial, metric="episode_reward_mean", mode="max"
    )
    print(best_checkpoint)
    print("Done training")
    return analysis, best_trial, best_checkpoint


run_experiment()


2021-12-03 14:03:59,695	INFO tune.py:467 -- TrialRunner resumed, ignoring new add_experiment.


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,PENDING,,2480,197853,9920000,-0.494477,0.119272,-2.20994,116.11


[2m[36m(pid=95010)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=95010)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=95010)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0
[2m[36m(pid=95010)[0m 2021-12-03 14:04:15,209	INFO torch_policy.py:148 -- TorchPolicy (worker=local) running on 1 GPU(s).
[2m[36m(pid=95010)[0m 2021-12-03 14:04:17,597	INFO rollout_worker.py:1199 -- Built policy map: {'default': <ray.rllib.policy.policy_template.PPOTorchPolicy object at 0x7fa8407c35e0>}
[2m[36m(pid=95010)[0m 2021-12-03 14:04:17,597	INFO rollout_worker.py:1200 -- Built preprocessor map: {'default': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x7fa8407c3c40>}
[2m[36m(pid=95010)[0m 2021-12-03 14:04:17,597	INFO rollout_worker.py:583 -- Built filter map: {'default': <ray.rllib.utils.filter.NoFilter object at 0x7fa

[2m[36m(pid=95010)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=95010)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=95010)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


[2m[36m(pid=95010)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=95010)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=95010)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


[2m[36m(pid=95010)[0m [INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=95010)[0m [INFO] Connected new brain: SoccerTwos?team=1
[2m[36m(pid=95010)[0m [INFO] Connected new brain: SoccerTwos?team=0


[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0
[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=1
[2m[36m(pid=95010)[0m INFO:mlagents_envs.environment:Connected new brain: SoccerTwos?team=0


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,,2479,197770,9916000,-0.517873,0.119272,-1.65443,119.44


[2m[36m(pid=95010)[0m 2021-12-03 14:04:20,093	INFO trainable.py:377 -- Restored on 192.168.0.108 from checkpoint: /home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_player_custom_rewards/PPO_Soccer_491c2_00000_0_2021-11-30_01-16-26/tmpqan_d6iirestore_from_object/checkpoint-2479
[2m[36m(pid=95010)[0m 2021-12-03 14:04:20,094	INFO trainable.py:385 -- Current state after restoring: {'_iteration': 2479, '_timesteps_total': None, '_time_total': 197770.27837729454, '_episodes_total': 65688}
[2m[36m(pid=95010)[0m 2021-12-03 14:04:20,096	INFO rollout_worker.py:723 -- Generating sample batch of size 800
[2m[36m(pid=95010)[0m 2021-12-03 14:04:20,117	INFO sampler.py:590 -- Raw obs from env: { 0: { 0: np.ndarray((336,), dtype=float32, min=0.0, max=1.0, mean=0.187),
[2m[36m(pid=95010)[0m        1: np.ndarray((336,), dtype=float32, min=0.0, max=1.0, mean=0.194),
[2m[36m(pid=95010)[0m        2: np.ndarray((336,), dtype=float32, min=0.0, max=1.0, mean=0.193),
[2m

Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39680000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.030434782608695674
    agent_0_total_agent_position_to_ball_reward_min: -0.1220000000000001
    agent_0_total_ball_to_goal_speed_reward_max: 0.6074068834671137
    agent_0_total_ball_to_goal_speed_reward_mean: 0.154306348065058
    agent_0_total_ball_to_goal_speed_reward_min: -0.7037157627838031
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-05-48
  done: false
  episode_len_mean: 146.04347826086956
  episode_media: {}
  episode_reward_max: 0.00267001546018264
  episode_reward_mean: -0.5504383065997986
  episode_reward_min: -1.082138190402616
  episodes_this_iter: 23
  episodes_total: 65711
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      def

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2480,197859,9920000,-0.550438,0.00267002,-1.08214,146.043


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39696000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03800000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.23200000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6496571387857252
    agent_0_total_ball_to_goal_speed_reward_mean: 0.09823728055330812
    agent_0_total_ball_to_goal_speed_reward_min: -0.7466784377563417
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-07-08
  done: false
  episode_len_mean: 150.65384615384616
  episode_media: {}
  episode_reward_max: 0.00267001546018264
  episode_reward_mean: -0.6158653884362626
  episode_reward_min: -1.774938172026876
  episodes_this_iter: 29
  episodes_total: 65740
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      d

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2481,197939,9924000,-0.615865,0.00267002,-1.77494,150.654


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39712000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03224444444444447
    agent_0_total_agent_position_to_ball_reward_min: -0.23200000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6496571387857252
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0147198928599845
    agent_0_total_ball_to_goal_speed_reward_min: -0.7466784377563417
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-08-27
  done: false
  episode_len_mean: 127.8
  episode_media: {}
  episode_reward_max: 0.00267001546018264
  episode_reward_mean: -0.5330319403449457
  episode_reward_min: -1.774938172026876
  episodes_this_iter: 38
  episodes_total: 65778
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2482,198017,9928000,-0.533032,0.00267002,-1.77494,127.8


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39728000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.040260000000000025
    agent_0_total_agent_position_to_ball_reward_min: -0.23200000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6496571387857252
    agent_0_total_ball_to_goal_speed_reward_mean: -0.016698843692278052
    agent_0_total_ball_to_goal_speed_reward_min: -0.7517809184110251
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-09-57
  done: false
  episode_len_mean: 139.98
  episode_media: {}
  episode_reward_max: -0.008873354217834528
  episode_reward_mean: -0.5858106915395289
  episode_reward_min: -2.1222816106321862
  episodes_this_iter: 26
  episodes_total: 65804
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2483,198096,9932000,-0.585811,-0.00887335,-2.12228,139.98


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39744000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03446000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.20100000000000015
    agent_0_total_ball_to_goal_speed_reward_max: 0.6217532994183699
    agent_0_total_ball_to_goal_speed_reward_mean: -0.10068537221545683
    agent_0_total_ball_to_goal_speed_reward_min: -0.7517809184110251
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-11-16
  done: false
  episode_len_mean: 121.9
  episode_media: {}
  episode_reward_max: -0.008873354217834528
  episode_reward_mean: -0.5234663910767436
  episode_reward_min: -2.1222816106321862
  episodes_this_iter: 35
  episodes_total: 65839
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2484,198174,9936000,-0.523466,-0.00887335,-2.12228,121.9


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39760000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.036540000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.20100000000000015
    agent_0_total_ball_to_goal_speed_reward_max: 0.6360079006226563
    agent_0_total_ball_to_goal_speed_reward_mean: -0.0929184603510721
    agent_0_total_ball_to_goal_speed_reward_min: -0.7517809184110251
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-12-35
  done: false
  episode_len_mean: 123.38
  episode_media: {}
  episode_reward_max: -0.08599757407417519
  episode_reward_mean: -0.5214243677322988
  episode_reward_min: -2.1222816106321862
  episodes_this_iter: 40
  episodes_total: 65879
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2485,198253,9940000,-0.521424,-0.0859976,-2.12228,123.38


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39776000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.028410000000000015
    agent_0_total_agent_position_to_ball_reward_min: -0.17600000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.6360079006226563
    agent_0_total_ball_to_goal_speed_reward_mean: -0.09313664430051947
    agent_0_total_ball_to_goal_speed_reward_min: -0.7166576932880108
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-14-08
  done: false
  episode_len_mean: 106.38
  episode_media: {}
  episode_reward_max: -0.06611504518707001
  episode_reward_mean: -0.4542770580523743
  episode_reward_min: -1.4714133806991265
  episodes_this_iter: 37
  episodes_total: 65916
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2486,198333,9944000,-0.454277,-0.066115,-1.47141,106.38


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39792000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02875000000000001
    agent_0_total_agent_position_to_ball_reward_min: -0.17600000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.6360079006226563
    agent_0_total_ball_to_goal_speed_reward_mean: -0.08606697361438222
    agent_0_total_ball_to_goal_speed_reward_min: -0.7166576932880108
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-15-25
  done: false
  episode_len_mean: 107.28
  episode_media: {}
  episode_reward_max: -0.021579712165600506
  episode_reward_mean: -0.4706478812939555
  episode_reward_min: -1.4714133806991265
  episodes_this_iter: 32
  episodes_total: 65948
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2487,198411,9948000,-0.470648,-0.0215797,-1.47141,107.28


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39808000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02875000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.18100000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.5169629551089108
    agent_0_total_ball_to_goal_speed_reward_mean: -0.02130745651518472
    agent_0_total_ball_to_goal_speed_reward_min: -0.7166576932880108
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-16-43
  done: false
  episode_len_mean: 111.12
  episode_media: {}
  episode_reward_max: -0.021579712165600506
  episode_reward_mean: -0.48197058348323146
  episode_reward_min: -1.6893886974033705
  episodes_this_iter: 36
  episodes_total: 65984
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2488,198488,9952000,-0.481971,-0.0215797,-1.68939,111.12


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39824000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.029480000000000017
    agent_0_total_agent_position_to_ball_reward_min: -0.18100000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.5336803281987541
    agent_0_total_ball_to_goal_speed_reward_mean: 0.053804924169248904
    agent_0_total_ball_to_goal_speed_reward_min: -0.7626271762248024
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-18-12
  done: false
  episode_len_mean: 119.29
  episode_media: {}
  episode_reward_max: 0.03616411568542999
  episode_reward_mean: -0.4954729223756669
  episode_reward_min: -1.744348045066972
  episodes_this_iter: 31
  episodes_total: 66015
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2489,198566,9956000,-0.495473,0.0361641,-1.74435,119.29


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39840000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.026680000000000023
    agent_0_total_agent_position_to_ball_reward_min: -0.17500000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.5336803281987541
    agent_0_total_ball_to_goal_speed_reward_mean: -0.0017522112579068144
    agent_0_total_ball_to_goal_speed_reward_min: -0.7626271762248024
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-19-30
  done: false
  episode_len_mean: 121.39
  episode_media: {}
  episode_reward_max: 0.07659804758422295
  episode_reward_mean: -0.47794464021798894
  episode_reward_min: -1.744348045066972
  episodes_this_iter: 35
  episodes_total: 66050
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2490,198643,9960000,-0.477945,0.076598,-1.74435,121.39


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39856000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02769000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.17500000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.5336803281987541
    agent_0_total_ball_to_goal_speed_reward_mean: -0.08763521382919837
    agent_0_total_ball_to_goal_speed_reward_min: -0.7626271762248024
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-20-48
  done: false
  episode_len_mean: 120.42
  episode_media: {}
  episode_reward_max: 0.07659804758422295
  episode_reward_mean: -0.49550818149487846
  episode_reward_min: -1.7610155945929604
  episodes_this_iter: 31
  episodes_total: 66081
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2491,198721,9964000,-0.495508,0.076598,-1.76102,120.42


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39872000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.029550000000000017
    agent_0_total_agent_position_to_ball_reward_min: -0.23200000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7878172120253568
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07577343556027558
    agent_0_total_ball_to_goal_speed_reward_min: -0.7488355732580027
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-22-18
  done: false
  episode_len_mean: 125.89
  episode_media: {}
  episode_reward_max: 0.07793262511977184
  episode_reward_mean: -0.4903769188185621
  episode_reward_min: -1.8538853941033753
  episodes_this_iter: 32
  episodes_total: 66113
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2492,198798,9968000,-0.490377,0.0779326,-1.85389,125.89


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39888000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02748000000000001
    agent_0_total_agent_position_to_ball_reward_min: -0.23200000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7878172120253568
    agent_0_total_ball_to_goal_speed_reward_mean: 0.01295665140582814
    agent_0_total_ball_to_goal_speed_reward_min: -0.7488355732580027
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-23-35
  done: false
  episode_len_mean: 119.34
  episode_media: {}
  episode_reward_max: 0.15260712160429923
  episode_reward_mean: -0.4686383940198146
  episode_reward_min: -1.8538853941033753
  episodes_this_iter: 37
  episodes_total: 66150
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2493,198876,9972000,-0.468638,0.152607,-1.85389,119.34


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39904000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.023930000000000017
    agent_0_total_agent_position_to_ball_reward_min: -0.1470000000000001
    agent_0_total_ball_to_goal_speed_reward_max: 0.7878172120253568
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08083872052910065
    agent_0_total_ball_to_goal_speed_reward_min: -0.6519996540276433
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-24-54
  done: false
  episode_len_mean: 107.7
  episode_media: {}
  episode_reward_max: 0.15260712160429923
  episode_reward_mean: -0.41961780457686026
  episode_reward_min: -1.3906448139130534
  episodes_this_iter: 40
  episodes_total: 66190
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2494,198955,9976000,-0.419618,0.152607,-1.39064,107.7


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39920000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.027350000000000027
    agent_0_total_agent_position_to_ball_reward_min: -0.19200000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.6296480219959063
    agent_0_total_ball_to_goal_speed_reward_mean: 0.01724883806130557
    agent_0_total_ball_to_goal_speed_reward_min: -0.7190389553176083
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-26-13
  done: false
  episode_len_mean: 108.76
  episode_media: {}
  episode_reward_max: -0.02516309321953969
  episode_reward_mean: -0.44553192773974587
  episode_reward_min: -1.8942997939929795
  episodes_this_iter: 30
  episodes_total: 66220
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2495,199033,9980000,-0.445532,-0.0251631,-1.8943,108.76


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39936000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03225000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.19200000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.620505485286237
    agent_0_total_ball_to_goal_speed_reward_mean: -0.0545773288911821
    agent_0_total_ball_to_goal_speed_reward_min: -0.7382932284877082
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-27-44
  done: false
  episode_len_mean: 112.04
  episode_media: {}
  episode_reward_max: 0.04111457240494554
  episode_reward_mean: -0.4701085579967776
  episode_reward_min: -2.114264759612092
  episodes_this_iter: 37
  episodes_total: 66257
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2496,199112,9984000,-0.470109,0.0411146,-2.11426,112.04


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39952000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03307000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.21500000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.620505485286237
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07944639223861891
    agent_0_total_ball_to_goal_speed_reward_min: -0.7382932284877082
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-29-03
  done: false
  episode_len_mean: 117.71
  episode_media: {}
  episode_reward_max: 0.04111457240494554
  episode_reward_mean: -0.489500689887824
  episode_reward_min: -2.114264759612092
  episodes_this_iter: 36
  episodes_total: 66293
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2497,199191,9988000,-0.489501,0.0411146,-2.11426,117.71


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39968000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03073000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.21500000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.620505485286237
    agent_0_total_ball_to_goal_speed_reward_mean: -0.1080607658354351
    agent_0_total_ball_to_goal_speed_reward_min: -0.6680325983935506
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-30-20
  done: false
  episode_len_mean: 112.43
  episode_media: {}
  episode_reward_max: 0.19519172979715904
  episode_reward_mean: -0.4544664303943551
  episode_reward_min: -2.0017598642956105
  episodes_this_iter: 35
  episodes_total: 66328
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2498,199268,9992000,-0.454466,0.195192,-2.00176,112.43


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 39984000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.031560000000000026
    agent_0_total_agent_position_to_ball_reward_min: -0.21500000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.6317769865945186
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07021151719721819
    agent_0_total_ball_to_goal_speed_reward_min: -0.6680325983935506
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-31-50
  done: false
  episode_len_mean: 120.33
  episode_media: {}
  episode_reward_max: 0.19519172979715904
  episode_reward_mean: -0.4769678082314416
  episode_reward_min: -2.0017598642956105
  episodes_this_iter: 29
  episodes_total: 66357
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2499,199346,9996000,-0.476968,0.195192,-2.00176,120.33


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40000000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03260000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.18600000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.6990673858526775
    agent_0_total_ball_to_goal_speed_reward_mean: -0.08851641251741274
    agent_0_total_ball_to_goal_speed_reward_min: -0.7615557029152958
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-33-07
  done: false
  episode_len_mean: 120.99
  episode_media: {}
  episode_reward_max: 0.19519172979715904
  episode_reward_mean: -0.48505814210247244
  episode_reward_min: -2.0017598642956105
  episodes_this_iter: 36
  episodes_total: 66393
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2500,199423,10000000,-0.485058,0.195192,-2.00176,120.99


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40016000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03855000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.3050000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6990673858526775
    agent_0_total_ball_to_goal_speed_reward_mean: -0.03062034844539262
    agent_0_total_ball_to_goal_speed_reward_min: -0.7615557029152958
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-34-25
  done: false
  episode_len_mean: 133.95
  episode_media: {}
  episode_reward_max: 0.00673367950858017
  episode_reward_mean: -0.5500460382436597
  episode_reward_min: -3.2350150322974938
  episodes_this_iter: 25
  episodes_total: 66418
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2501,199501,10004000,-0.550046,0.00673368,-3.23502,133.95


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40032000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.039990000000000026
    agent_0_total_agent_position_to_ball_reward_min: -0.3050000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7173475568661247
    agent_0_total_ball_to_goal_speed_reward_mean: -0.03188645440581579
    agent_0_total_ball_to_goal_speed_reward_min: -0.7615557029152958
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-35-55
  done: false
  episode_len_mean: 129.83
  episode_media: {}
  episode_reward_max: 0.28831834785670063
  episode_reward_mean: -0.5360400825484976
  episode_reward_min: -3.2350150322974938
  episodes_this_iter: 31
  episodes_total: 66449
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2502,199578,10008000,-0.53604,0.288318,-3.23502,129.83


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40048000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03472000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.3050000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7173475568661247
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0063956299215136994
    agent_0_total_ball_to_goal_speed_reward_min: -0.7615557029152958
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-37-12
  done: false
  episode_len_mean: 121.84
  episode_media: {}
  episode_reward_max: 0.28831834785670063
  episode_reward_mean: -0.5160158504254688
  episode_reward_min: -3.2350150322974938
  episodes_this_iter: 41
  episodes_total: 66490
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2503,199656,10012000,-0.516016,0.288318,-3.23502,121.84


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40064000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03874000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.23000000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7173475568661247
    agent_0_total_ball_to_goal_speed_reward_mean: 0.016923527106143383
    agent_0_total_ball_to_goal_speed_reward_min: -0.6506102587594709
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-38-30
  done: false
  episode_len_mean: 131.49
  episode_media: {}
  episode_reward_max: 0.28831834785670063
  episode_reward_mean: -0.5387442305925224
  episode_reward_min: -2.199459452524616
  episodes_this_iter: 21
  episodes_total: 66511
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2504,199733,10016000,-0.538744,0.288318,-2.19946,131.49


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40080000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03539000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2960000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6065535531532584
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0017746620082111698
    agent_0_total_ball_to_goal_speed_reward_min: -0.6361968775430288
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-39-47
  done: false
  episode_len_mean: 126.27
  episode_media: {}
  episode_reward_max: 0.05289471748223118
  episode_reward_mean: -0.5392487858858422
  episode_reward_min: -2.199459452524616
  episodes_this_iter: 27
  episodes_total: 66538
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2505,199810,10020000,-0.539249,0.0528947,-2.19946,126.27


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40096000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.040180000000000035
    agent_0_total_agent_position_to_ball_reward_min: -0.2960000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6065535531532584
    agent_0_total_ball_to_goal_speed_reward_mean: -0.03501445646418347
    agent_0_total_ball_to_goal_speed_reward_min: -0.7492020982766776
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-41-17
  done: false
  episode_len_mean: 142.85
  episode_media: {}
  episode_reward_max: 0.05289471748223118
  episode_reward_mean: -0.5872584037333999
  episode_reward_min: -2.2248774704612004
  episodes_this_iter: 28
  episodes_total: 66566
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2506,199888,10024000,-0.587258,0.0528947,-2.22488,142.85


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40112000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.04722000000000004
    agent_0_total_agent_position_to_ball_reward_min: -0.2960000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6065535531532584
    agent_0_total_ball_to_goal_speed_reward_mean: -0.10095889010793624
    agent_0_total_ball_to_goal_speed_reward_min: -0.7492020982766776
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-42-36
  done: false
  episode_len_mean: 160.86
  episode_media: {}
  episode_reward_max: 0.05289471748223118
  episode_reward_mean: -0.6457622428409928
  episode_reward_min: -2.2248774704612004
  episodes_this_iter: 24
  episodes_total: 66590
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2507,199967,10028000,-0.645762,0.0528947,-2.22488,160.86


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40128000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.037720000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.2630000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6065535531532584
    agent_0_total_ball_to_goal_speed_reward_mean: -0.09781808605180627
    agent_0_total_ball_to_goal_speed_reward_min: -0.7492020982766776
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-43-54
  done: false
  episode_len_mean: 139.72
  episode_media: {}
  episode_reward_max: 0.09857651622702246
  episode_reward_mean: -0.5602206723798916
  episode_reward_min: -2.2248774704612004
  episodes_this_iter: 37
  episodes_total: 66627
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2508,200045,10032000,-0.560221,0.0985765,-2.22488,139.72


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40144000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03533000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2980000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6482986017047074
    agent_0_total_ball_to_goal_speed_reward_mean: -0.035593212788625414
    agent_0_total_ball_to_goal_speed_reward_min: -0.7492020982766776
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-45-25
  done: false
  episode_len_mean: 130.35
  episode_media: {}
  episode_reward_max: 0.09857651622702246
  episode_reward_mean: -0.5156694192701464
  episode_reward_min: -2.2248774704612004
  episodes_this_iter: 35
  episodes_total: 66662
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2509,200124,10036000,-0.515669,0.0985765,-2.22488,130.35


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40160000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.027210000000000015
    agent_0_total_agent_position_to_ball_reward_min: -0.2980000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6482986017047074
    agent_0_total_ball_to_goal_speed_reward_mean: -0.006029594989708462
    agent_0_total_ball_to_goal_speed_reward_min: -0.6711470088283435
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-46-45
  done: false
  episode_len_mean: 101.87
  episode_media: {}
  episode_reward_max: 0.09857651622702246
  episode_reward_mean: -0.45236560904509415
  episode_reward_min: -1.9968332817161387
  episodes_this_iter: 45
  episodes_total: 66707
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2510,200203,10040000,-0.452366,0.0985765,-1.99683,101.87


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40176000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.029480000000000017
    agent_0_total_agent_position_to_ball_reward_min: -0.2980000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6482986017047074
    agent_0_total_ball_to_goal_speed_reward_mean: -0.030831284707123992
    agent_0_total_ball_to_goal_speed_reward_min: -0.6711470088283435
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-48-03
  done: false
  episode_len_mean: 112.24
  episode_media: {}
  episode_reward_max: 0.049440138090282915
  episode_reward_mean: -0.4720570938316017
  episode_reward_min: -1.9968332817161387
  episodes_this_iter: 31
  episodes_total: 66738
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2511,200281,10044000,-0.472057,0.0494401,-1.99683,112.24


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40192000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.026490000000000017
    agent_0_total_agent_position_to_ball_reward_min: -0.23200000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6305108848986919
    agent_0_total_ball_to_goal_speed_reward_mean: 0.006055514512876683
    agent_0_total_ball_to_goal_speed_reward_min: -0.6711470088283435
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-49-32
  done: false
  episode_len_mean: 110.78
  episode_media: {}
  episode_reward_max: 0.1414422020535251
  episode_reward_mean: -0.4552302036349009
  episode_reward_min: -1.6333257455416046
  episodes_this_iter: 30
  episodes_total: 66768
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2512,200358,10048000,-0.45523,0.141442,-1.63333,110.78


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40208000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.029710000000000014
    agent_0_total_agent_position_to_ball_reward_min: -0.23200000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6867680587360543
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0428471074902974
    agent_0_total_ball_to_goal_speed_reward_min: -0.6341489619409673
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-50-49
  done: false
  episode_len_mean: 124.23
  episode_media: {}
  episode_reward_max: 0.1414422020535251
  episode_reward_mean: -0.4983705515698213
  episode_reward_min: -1.8931473774307424
  episodes_this_iter: 34
  episodes_total: 66802
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2513,200435,10052000,-0.498371,0.141442,-1.89315,124.23


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40224000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.028520000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.23200000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6867680587360543
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04257918326853292
    agent_0_total_ball_to_goal_speed_reward_min: -0.6698515059120933
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-52-11
  done: false
  episode_len_mean: 117.84
  episode_media: {}
  episode_reward_max: 0.1414422020535251
  episode_reward_mean: -0.4728228289572567
  episode_reward_min: -1.8931473774307424
  episodes_this_iter: 33
  episodes_total: 66835
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2514,200517,10056000,-0.472823,0.141442,-1.89315,117.84


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40240000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.028930000000000015
    agent_0_total_agent_position_to_ball_reward_min: -0.1550000000000001
    agent_0_total_ball_to_goal_speed_reward_max: 0.6867680587360543
    agent_0_total_ball_to_goal_speed_reward_mean: 0.030123080664442806
    agent_0_total_ball_to_goal_speed_reward_min: -0.6698515059120933
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-53-41
  done: false
  episode_len_mean: 116.64
  episode_media: {}
  episode_reward_max: 0.2325060207295535
  episode_reward_mean: -0.49028324062829015
  episode_reward_min: -1.8931473774307424
  episodes_this_iter: 33
  episodes_total: 66868
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2515,200595,10060000,-0.490283,0.232506,-1.89315,116.64


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40256000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.028400000000000016
    agent_0_total_agent_position_to_ball_reward_min: -0.17800000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.6867680587360543
    agent_0_total_ball_to_goal_speed_reward_mean: -0.024153191549186968
    agent_0_total_ball_to_goal_speed_reward_min: -0.7928248020875908
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-54-59
  done: false
  episode_len_mean: 119.78
  episode_media: {}
  episode_reward_max: 0.2325060207295535
  episode_reward_mean: -0.49015179372186163
  episode_reward_min: -2.484311974468998
  episodes_this_iter: 28
  episodes_total: 66896
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2516,200672,10064000,-0.490152,0.232506,-2.48431,119.78


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40272000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.030690000000000016
    agent_0_total_agent_position_to_ball_reward_min: -0.17800000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.6124236581994801
    agent_0_total_ball_to_goal_speed_reward_mean: -0.05603528942755316
    agent_0_total_ball_to_goal_speed_reward_min: -0.7928248020875908
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-56-17
  done: false
  episode_len_mean: 123.87
  episode_media: {}
  episode_reward_max: 0.2325060207295535
  episode_reward_mean: -0.506969926825862
  episode_reward_min: -2.484311974468998
  episodes_this_iter: 38
  episodes_total: 66934
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2517,200750,10068000,-0.50697,0.232506,-2.48431,123.87


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40288000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03286000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.18800000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.7064167422843373
    agent_0_total_ball_to_goal_speed_reward_mean: -0.05082473067671131
    agent_0_total_ball_to_goal_speed_reward_min: -0.7928248020875908
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-57-34
  done: false
  episode_len_mean: 124.23
  episode_media: {}
  episode_reward_max: 0.07186439685722767
  episode_reward_mean: -0.4898335673016848
  episode_reward_min: -2.484311974468998
  episodes_this_iter: 32
  episodes_total: 66966
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2518,200827,10072000,-0.489834,0.0718644,-2.48431,124.23


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40304000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03467000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.20300000000000015
    agent_0_total_ball_to_goal_speed_reward_max: 0.7064167422843373
    agent_0_total_ball_to_goal_speed_reward_mean: -0.021666038523005922
    agent_0_total_ball_to_goal_speed_reward_min: -0.652121011682947
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_14-59-04
  done: false
  episode_len_mean: 123.67
  episode_media: {}
  episode_reward_max: 0.07186439685722767
  episode_reward_mean: -0.48974109606242927
  episode_reward_min: -1.7685464470159207
  episodes_this_iter: 30
  episodes_total: 66996
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2519,200905,10076000,-0.489741,0.0718644,-1.76855,123.67


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40320000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.04109000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.23000000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.8086478555864062
    agent_0_total_ball_to_goal_speed_reward_mean: -0.020462312001486578
    agent_0_total_ball_to_goal_speed_reward_min: -0.685244552926725
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-00-21
  done: false
  episode_len_mean: 133.83
  episode_media: {}
  episode_reward_max: -0.04791453932345924
  episode_reward_mean: -0.5403275509733999
  episode_reward_min: -1.7685464470159207
  episodes_this_iter: 24
  episodes_total: 67020
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2520,200982,10080000,-0.540328,-0.0479145,-1.76855,133.83


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40336000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.04567000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.23000000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.8086478555864062
    agent_0_total_ball_to_goal_speed_reward_mean: 0.005447363184424555
    agent_0_total_ball_to_goal_speed_reward_min: -0.685244552926725
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-01-39
  done: false
  episode_len_mean: 139.11
  episode_media: {}
  episode_reward_max: -0.04791453932345924
  episode_reward_mean: -0.5672403722559235
  episode_reward_min: -1.5897684213741143
  episodes_this_iter: 28
  episodes_total: 67048
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2521,201060,10084000,-0.56724,-0.0479145,-1.58977,139.11


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40352000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.044620000000000035
    agent_0_total_agent_position_to_ball_reward_min: -0.23000000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.8086478555864062
    agent_0_total_ball_to_goal_speed_reward_mean: -0.009963675095178738
    agent_0_total_ball_to_goal_speed_reward_min: -0.6883989923873239
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-03-08
  done: false
  episode_len_mean: 146.62
  episode_media: {}
  episode_reward_max: -0.03985801701131697
  episode_reward_mean: -0.5931051793781091
  episode_reward_min: -1.654828530548842
  episodes_this_iter: 29
  episodes_total: 67077
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2522,201137,10088000,-0.593105,-0.039858,-1.65483,146.62


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40368000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03928000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.23000000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7392353772291149
    agent_0_total_ball_to_goal_speed_reward_mean: -0.012374937269230366
    agent_0_total_ball_to_goal_speed_reward_min: -0.6883989923873239
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-04-26
  done: false
  episode_len_mean: 139.8
  episode_media: {}
  episode_reward_max: -0.03985801701131697
  episode_reward_mean: -0.5578524179454795
  episode_reward_min: -1.654828530548842
  episodes_this_iter: 26
  episodes_total: 67103
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2523,201215,10092000,-0.557852,-0.039858,-1.65483,139.8


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40384000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03606000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.17800000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.7622884845408203
    agent_0_total_ball_to_goal_speed_reward_mean: -0.0022589116368717417
    agent_0_total_ball_to_goal_speed_reward_min: -0.6883989923873239
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-05-44
  done: false
  episode_len_mean: 140.21
  episode_media: {}
  episode_reward_max: -0.0393706594004013
  episode_reward_mean: -0.5488802259420443
  episode_reward_min: -2.2457777151420633
  episodes_this_iter: 33
  episodes_total: 67136
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2524,201293,10096000,-0.54888,-0.0393707,-2.24578,140.21


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40400000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02983000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.16000000000000011
    agent_0_total_ball_to_goal_speed_reward_max: 0.7686795894578305
    agent_0_total_ball_to_goal_speed_reward_mean: 0.021680989352382895
    agent_0_total_ball_to_goal_speed_reward_min: -0.6883989923873239
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-07-13
  done: false
  episode_len_mean: 128.01
  episode_media: {}
  episode_reward_max: 0.07374251073843419
  episode_reward_mean: -0.5078712926069966
  episode_reward_min: -2.2457777151420633
  episodes_this_iter: 37
  episodes_total: 67173
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2525,201370,10100000,-0.507871,0.0737425,-2.24578,128.01


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40416000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02637000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.23600000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7686795894578305
    agent_0_total_ball_to_goal_speed_reward_mean: 0.049683283430819236
    agent_0_total_ball_to_goal_speed_reward_min: -0.6623312035013311
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-08-31
  done: false
  episode_len_mean: 106.5
  episode_media: {}
  episode_reward_max: 0.07374251073843419
  episode_reward_mean: -0.4545878104714688
  episode_reward_min: -1.9046420329629645
  episodes_this_iter: 40
  episodes_total: 67213
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2526,201447,10104000,-0.454588,0.0737425,-1.90464,106.5


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40432000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.024930000000000018
    agent_0_total_agent_position_to_ball_reward_min: -0.23600000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7686795894578305
    agent_0_total_ball_to_goal_speed_reward_mean: 0.10697606764537873
    agent_0_total_ball_to_goal_speed_reward_min: -0.6623312035013311
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-09-48
  done: false
  episode_len_mean: 110.11
  episode_media: {}
  episode_reward_max: 0.04622661648147286
  episode_reward_mean: -0.45834588247556335
  episode_reward_min: -1.5733233221054848
  episodes_this_iter: 33
  episodes_total: 67246
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2527,201525,10108000,-0.458346,0.0462266,-1.57332,110.11


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40448000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.028480000000000016
    agent_0_total_agent_position_to_ball_reward_min: -0.23600000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6970371252312397
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08749212134091072
    agent_0_total_ball_to_goal_speed_reward_min: -0.6330070672709618
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-11-17
  done: false
  episode_len_mean: 114.73
  episode_media: {}
  episode_reward_max: 0.1436165781122225
  episode_reward_mean: -0.45929217130592564
  episode_reward_min: -1.5733233221054848
  episodes_this_iter: 31
  episodes_total: 67277
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2528,201602,10112000,-0.459292,0.143617,-1.57332,114.73


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40464000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02735000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.16000000000000011
    agent_0_total_ball_to_goal_speed_reward_max: 0.6870430998261668
    agent_0_total_ball_to_goal_speed_reward_mean: 0.047908577557265755
    agent_0_total_ball_to_goal_speed_reward_min: -0.6993572468303935
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-12-35
  done: false
  episode_len_mean: 117.29
  episode_media: {}
  episode_reward_max: 0.1436165781122225
  episode_reward_mean: -0.44469281120701704
  episode_reward_min: -1.5733233221054848
  episodes_this_iter: 38
  episodes_total: 67315
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2529,201680,10116000,-0.444693,0.143617,-1.57332,117.29


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40480000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03063000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2700000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6870430998261668
    agent_0_total_ball_to_goal_speed_reward_mean: -0.010700958380566514
    agent_0_total_ball_to_goal_speed_reward_min: -0.6993572468303935
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-13-53
  done: false
  episode_len_mean: 119.16
  episode_media: {}
  episode_reward_max: 0.1436165781122225
  episode_reward_mean: -0.47081964383527863
  episode_reward_min: -2.2353603211846638
  episodes_this_iter: 34
  episodes_total: 67349
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2530,201757,10120000,-0.47082,0.143617,-2.23536,119.16


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40496000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02704000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2700000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7360035649182691
    agent_0_total_ball_to_goal_speed_reward_mean: -0.03845205198285587
    agent_0_total_ball_to_goal_speed_reward_min: -0.7123455836807744
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-15-22
  done: false
  episode_len_mean: 114.22
  episode_media: {}
  episode_reward_max: 0.08052636777951605
  episode_reward_mean: -0.4714226335372503
  episode_reward_min: -2.2353603211846638
  episodes_this_iter: 32
  episodes_total: 67381
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2531,201835,10124000,-0.471423,0.0805264,-2.23536,114.22


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40512000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02925000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2700000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7360035649182691
    agent_0_total_ball_to_goal_speed_reward_mean: -0.009351462094752686
    agent_0_total_ball_to_goal_speed_reward_min: -0.7198368268851357
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-16-40
  done: false
  episode_len_mean: 119.63
  episode_media: {}
  episode_reward_max: 0.08052636777951605
  episode_reward_mean: -0.5033482724706445
  episode_reward_min: -2.2353603211846638
  episodes_this_iter: 31
  episodes_total: 67412
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2532,201912,10128000,-0.503348,0.0805264,-2.23536,119.63


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40528000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.030710000000000025
    agent_0_total_agent_position_to_ball_reward_min: -0.21500000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.7360035649182691
    agent_0_total_ball_to_goal_speed_reward_mean: -0.03325334231565175
    agent_0_total_ball_to_goal_speed_reward_min: -0.7198368268851357
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-17-58
  done: false
  episode_len_mean: 125.51
  episode_media: {}
  episode_reward_max: 0.08052636777951605
  episode_reward_mean: -0.5168922084351618
  episode_reward_min: -1.5738063250330363
  episodes_this_iter: 31
  episodes_total: 67443
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2533,201990,10132000,-0.516892,0.0805264,-1.57381,125.51


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40544000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.034710000000000026
    agent_0_total_agent_position_to_ball_reward_min: -0.21500000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.6949357124820041
    agent_0_total_ball_to_goal_speed_reward_mean: -0.05739441307657362
    agent_0_total_ball_to_goal_speed_reward_min: -0.7198368268851357
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-19-28
  done: false
  episode_len_mean: 128.08
  episode_media: {}
  episode_reward_max: -0.03483822997232178
  episode_reward_mean: -0.5325627468720281
  episode_reward_min: -1.6510785732240765
  episodes_this_iter: 30
  episodes_total: 67473
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2534,202067,10136000,-0.532563,-0.0348382,-1.65108,128.08


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40560000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.037880000000000025
    agent_0_total_agent_position_to_ball_reward_min: -0.21500000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.6949357124820041
    agent_0_total_ball_to_goal_speed_reward_mean: -0.08236210326730171
    agent_0_total_ball_to_goal_speed_reward_min: -0.7137168497988988
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-20-45
  done: false
  episode_len_mean: 127.82
  episode_media: {}
  episode_reward_max: 0.008705578311156259
  episode_reward_mean: -0.5308877575809577
  episode_reward_min: -1.6510785732240765
  episodes_this_iter: 35
  episodes_total: 67508
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2535,202145,10140000,-0.530888,0.00870558,-1.65108,127.82


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40576000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.029050000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.19000000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.6034462738245409
    agent_0_total_ball_to_goal_speed_reward_mean: -0.08356230911509459
    agent_0_total_ball_to_goal_speed_reward_min: -0.6706716886029352
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-22-03
  done: false
  episode_len_mean: 107.54
  episode_media: {}
  episode_reward_max: 0.008705578311156259
  episode_reward_mean: -0.4617448070667126
  episode_reward_min: -1.6510785732240765
  episodes_this_iter: 38
  episodes_total: 67546
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2536,202223,10144000,-0.461745,0.00870558,-1.65108,107.54


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40592000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03728000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2860000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6034462738245409
    agent_0_total_ball_to_goal_speed_reward_mean: -0.05416295802125468
    agent_0_total_ball_to_goal_speed_reward_min: -0.6706716886029352
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-23-21
  done: false
  episode_len_mean: 111.85
  episode_media: {}
  episode_reward_max: 0.03568754536768859
  episode_reward_mean: -0.47341879811603893
  episode_reward_min: -1.739326016106913
  episodes_this_iter: 32
  episodes_total: 67578
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2537,202300,10148000,-0.473419,0.0356875,-1.73933,111.85


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40608000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.038580000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.3020000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7600304883803279
    agent_0_total_ball_to_goal_speed_reward_mean: -0.020399112719704928
    agent_0_total_ball_to_goal_speed_reward_min: -0.6706716886029352
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-24-50
  done: false
  episode_len_mean: 117.37
  episode_media: {}
  episode_reward_max: 0.03568754536768859
  episode_reward_mean: -0.493926034996968
  episode_reward_min: -2.0506979777917005
  episodes_this_iter: 30
  episodes_total: 67608
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2538,202377,10152000,-0.493926,0.0356875,-2.0507,117.37


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40624000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.043160000000000025
    agent_0_total_agent_position_to_ball_reward_min: -0.3020000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7600304883803279
    agent_0_total_ball_to_goal_speed_reward_mean: -0.0199378860617468
    agent_0_total_ball_to_goal_speed_reward_min: -0.6415157412223105
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-26-08
  done: false
  episode_len_mean: 127.93
  episode_media: {}
  episode_reward_max: 0.03568754536768859
  episode_reward_mean: -0.5197588597817554
  episode_reward_min: -2.0506979777917005
  episodes_this_iter: 34
  episodes_total: 67642
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2539,202455,10156000,-0.519759,0.0356875,-2.0507,127.93


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40640000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03574000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.3020000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7600304883803279
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0035473697758201606
    agent_0_total_ball_to_goal_speed_reward_min: -0.8095084796902062
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-27-25
  done: false
  episode_len_mean: 126.06
  episode_media: {}
  episode_reward_max: -0.014120378018347424
  episode_reward_mean: -0.5165195966928852
  episode_reward_min: -2.0506979777917005
  episodes_this_iter: 34
  episodes_total: 67676
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2540,202532,10160000,-0.51652,-0.0141204,-2.0507,126.06


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40656000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03496000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.23000000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.5983522731227479
    agent_0_total_ball_to_goal_speed_reward_mean: -0.012956660392222864
    agent_0_total_ball_to_goal_speed_reward_min: -0.8095084796902062
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-28-55
  done: false
  episode_len_mean: 124.86
  episode_media: {}
  episode_reward_max: -0.014120378018347424
  episode_reward_mean: -0.5085773633819867
  episode_reward_min: -1.6865654310985594
  episodes_this_iter: 28
  episodes_total: 67704
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2541,202610,10164000,-0.508577,-0.0141204,-1.68657,124.86


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40672000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.033820000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.23000000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.5983522731227479
    agent_0_total_ball_to_goal_speed_reward_mean: 0.00555487654553106
    agent_0_total_ball_to_goal_speed_reward_min: -0.8095084796902062
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-30-13
  done: false
  episode_len_mean: 123.16
  episode_media: {}
  episode_reward_max: -0.014120378018347424
  episode_reward_mean: -0.5170391915478713
  episode_reward_min: -2.009252127143855
  episodes_this_iter: 34
  episodes_total: 67738
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2542,202687,10168000,-0.517039,-0.0141204,-2.00925,123.16


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40688000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03781000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.23000000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6601280451778934
    agent_0_total_ball_to_goal_speed_reward_mean: -0.004815681923660383
    agent_0_total_ball_to_goal_speed_reward_min: -0.6696315828381206
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-31-30
  done: false
  episode_len_mean: 130.02
  episode_media: {}
  episode_reward_max: -0.014120378018347424
  episode_reward_mean: -0.5439130875910291
  episode_reward_min: -2.009252127143855
  episodes_this_iter: 27
  episodes_total: 67765
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2543,202764,10172000,-0.543913,-0.0141204,-2.00925,130.02


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40704000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.045370000000000035
    agent_0_total_agent_position_to_ball_reward_min: -0.2440000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6601280451778934
    agent_0_total_ball_to_goal_speed_reward_mean: -0.0029134881554759105
    agent_0_total_ball_to_goal_speed_reward_min: -0.6696315828381206
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-32-48
  done: false
  episode_len_mean: 142.18
  episode_media: {}
  episode_reward_max: -0.06748256127284957
  episode_reward_mean: -0.5818422852520522
  episode_reward_min: -2.177052542028356
  episodes_this_iter: 27
  episodes_total: 67792
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2544,202842,10176000,-0.581842,-0.0674826,-2.17705,142.18


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40720000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03900000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2440000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6601280451778934
    agent_0_total_ball_to_goal_speed_reward_mean: -0.028305432276457262
    agent_0_total_ball_to_goal_speed_reward_min: -0.7415084555793924
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-34-17
  done: false
  episode_len_mean: 125.07
  episode_media: {}
  episode_reward_max: 0.019430224533906193
  episode_reward_mean: -0.49863932173815
  episode_reward_min: -2.177052542028356
  episodes_this_iter: 42
  episodes_total: 67834
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2545,202919,10180000,-0.498639,0.0194302,-2.17705,125.07


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40736000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03671000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2440000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6136477822274562
    agent_0_total_ball_to_goal_speed_reward_mean: -0.03125555768370782
    agent_0_total_ball_to_goal_speed_reward_min: -0.7435080626674655
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-35-35
  done: false
  episode_len_mean: 121.23
  episode_media: {}
  episode_reward_max: 0.02318832967414508
  episode_reward_mean: -0.5105515412300623
  episode_reward_min: -2.177052542028356
  episodes_this_iter: 28
  episodes_total: 67862
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2546,202997,10184000,-0.510552,0.0231883,-2.17705,121.23


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40752000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02951000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2930000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6756844605337295
    agent_0_total_ball_to_goal_speed_reward_mean: -0.03303407769158256
    agent_0_total_ball_to_goal_speed_reward_min: -0.7435080626674655
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-36-53
  done: false
  episode_len_mean: 110.99
  episode_media: {}
  episode_reward_max: 0.09116335675544285
  episode_reward_mean: -0.48482126650751545
  episode_reward_min: -3.5730479130619184
  episodes_this_iter: 38
  episodes_total: 67900
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2547,203075,10188000,-0.484821,0.0911634,-3.57305,110.99


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40768000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.028760000000000018
    agent_0_total_agent_position_to_ball_reward_min: -0.2930000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6756844605337295
    agent_0_total_ball_to_goal_speed_reward_mean: -0.05059876071431187
    agent_0_total_ball_to_goal_speed_reward_min: -0.753228644976612
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-38-22
  done: false
  episode_len_mean: 108.28
  episode_media: {}
  episode_reward_max: 0.09116335675544285
  episode_reward_mean: -0.4821774879365799
  episode_reward_min: -3.5730479130619184
  episodes_this_iter: 36
  episodes_total: 67936
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2548,203152,10192000,-0.482177,0.0911634,-3.57305,108.28


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40784000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03399000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2930000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7646797692963522
    agent_0_total_ball_to_goal_speed_reward_mean: -0.004819637774328055
    agent_0_total_ball_to_goal_speed_reward_min: -0.753228644976612
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-39-40
  done: false
  episode_len_mean: 113.04
  episode_media: {}
  episode_reward_max: 0.09116335675544285
  episode_reward_mean: -0.4763552672140775
  episode_reward_min: -3.5730479130619184
  episodes_this_iter: 39
  episodes_total: 67975
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2549,203230,10196000,-0.476355,0.0911634,-3.57305,113.04


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40800000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03275000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2640000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7646797692963522
    agent_0_total_ball_to_goal_speed_reward_mean: 0.012823590772102182
    agent_0_total_ball_to_goal_speed_reward_min: -0.753228644976612
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-40-58
  done: false
  episode_len_mean: 109.37
  episode_media: {}
  episode_reward_max: -0.0424905095021606
  episode_reward_mean: -0.4500807724762138
  episode_reward_min: -1.7983837704030892
  episodes_this_iter: 35
  episodes_total: 68010
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2550,203308,10200000,-0.450081,-0.0424905,-1.79838,109.37


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40816000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03619000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2640000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7646797692963522
    agent_0_total_ball_to_goal_speed_reward_mean: 0.017675274286465762
    agent_0_total_ball_to_goal_speed_reward_min: -0.664654751283798
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-42-27
  done: false
  episode_len_mean: 122.77
  episode_media: {}
  episode_reward_max: 0.2653142803737094
  episode_reward_mean: -0.4760558152575945
  episode_reward_min: -1.7983837704030892
  episodes_this_iter: 24
  episodes_total: 68034
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2551,203385,10204000,-0.476056,0.265314,-1.79838,122.77


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40832000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03531000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.18000000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.6837662817947376
    agent_0_total_ball_to_goal_speed_reward_mean: 0.008352491164221112
    agent_0_total_ball_to_goal_speed_reward_min: -0.6970782730832695
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-43-45
  done: false
  episode_len_mean: 132.46
  episode_media: {}
  episode_reward_max: 0.2653142803737094
  episode_reward_mean: -0.5133983150630618
  episode_reward_min: -1.76870710791549
  episodes_this_iter: 27
  episodes_total: 68061
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2552,203462,10208000,-0.513398,0.265314,-1.76871,132.46


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40848000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03501000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2940000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.663065949414171
    agent_0_total_ball_to_goal_speed_reward_mean: 0.049206209987596686
    agent_0_total_ball_to_goal_speed_reward_min: -0.6970782730832695
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-45-03
  done: false
  episode_len_mean: 141.15
  episode_media: {}
  episode_reward_max: 0.2653142803737094
  episode_reward_mean: -0.5240058109131425
  episode_reward_min: -2.090803677523529
  episodes_this_iter: 31
  episodes_total: 68092
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2553,203540,10212000,-0.524006,0.265314,-2.0908,141.15


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40864000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.032460000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.2940000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7073469752098347
    agent_0_total_ball_to_goal_speed_reward_mean: 0.08735336521133162
    agent_0_total_ball_to_goal_speed_reward_min: -0.6970782730832695
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-46-32
  done: false
  episode_len_mean: 136.35
  episode_media: {}
  episode_reward_max: 0.021091148930552084
  episode_reward_mean: -0.5037889889744893
  episode_reward_min: -2.090803677523529
  episodes_this_iter: 32
  episodes_total: 68124
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2554,203617,10216000,-0.503789,0.0210911,-2.0908,136.35


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40880000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.035890000000000026
    agent_0_total_agent_position_to_ball_reward_min: -0.2940000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7073469752098347
    agent_0_total_ball_to_goal_speed_reward_mean: 0.04470671208099931
    agent_0_total_ball_to_goal_speed_reward_min: -0.6909122493129196
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-47-50
  done: false
  episode_len_mean: 133.54
  episode_media: {}
  episode_reward_max: 0.12772926302480725
  episode_reward_mean: -0.4959527930756195
  episode_reward_min: -2.090803677523529
  episodes_this_iter: 31
  episodes_total: 68155
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2555,203695,10220000,-0.495953,0.127729,-2.0908,133.54


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40896000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.040960000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.2940000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7073469752098347
    agent_0_total_ball_to_goal_speed_reward_mean: -0.01423852932184611
    agent_0_total_ball_to_goal_speed_reward_min: -0.6813979410675058
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-49-07
  done: false
  episode_len_mean: 140.96
  episode_media: {}
  episode_reward_max: 0.12772926302480725
  episode_reward_mean: -0.5153292995972275
  episode_reward_min: -2.454577381792306
  episodes_this_iter: 28
  episodes_total: 68183
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2556,203772,10224000,-0.515329,0.127729,-2.45458,140.96


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40912000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.04090000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2650000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7320209587094033
    agent_0_total_ball_to_goal_speed_reward_mean: -0.06961577151712572
    agent_0_total_ball_to_goal_speed_reward_min: -0.6813979410675058
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-50-37
  done: false
  episode_len_mean: 134.74
  episode_media: {}
  episode_reward_max: 0.14681643981326697
  episode_reward_mean: -0.517778523525128
  episode_reward_min: -2.454577381792306
  episodes_this_iter: 27
  episodes_total: 68210
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2557,203850,10228000,-0.517779,0.146816,-2.45458,134.74


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40928000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03994000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2650000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7320209587094033
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07308504406582203
    agent_0_total_ball_to_goal_speed_reward_min: -0.6340329968491969
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-51-55
  done: false
  episode_len_mean: 134.26
  episode_media: {}
  episode_reward_max: 0.14681643981326697
  episode_reward_mean: -0.5269029341697885
  episode_reward_min: -2.454577381792306
  episodes_this_iter: 34
  episodes_total: 68244
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2558,203928,10232000,-0.526903,0.146816,-2.45458,134.26


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40944000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.038610000000000026
    agent_0_total_agent_position_to_ball_reward_min: -0.2650000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7320209587094033
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07944271442346501
    agent_0_total_ball_to_goal_speed_reward_min: -0.6807547083721337
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-53-12
  done: false
  episode_len_mean: 127.31
  episode_media: {}
  episode_reward_max: 0.14681643981326697
  episode_reward_mean: -0.5242270318804401
  episode_reward_min: -1.8141027000386303
  episodes_this_iter: 32
  episodes_total: 68276
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2559,204005,10236000,-0.524227,0.146816,-1.8141,127.31


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40960000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03257000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.19900000000000015
    agent_0_total_ball_to_goal_speed_reward_max: 0.7320209587094033
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07977371218229617
    agent_0_total_ball_to_goal_speed_reward_min: -0.6948691392200939
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-54-42
  done: false
  episode_len_mean: 123.85
  episode_media: {}
  episode_reward_max: 0.020359732161267186
  episode_reward_mean: -0.5135076029140734
  episode_reward_min: -1.4892302469388121
  episodes_this_iter: 28
  episodes_total: 68304
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2560,204083,10240000,-0.513508,0.0203597,-1.48923,123.85


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40976000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.034460000000000025
    agent_0_total_agent_position_to_ball_reward_min: -0.19900000000000015
    agent_0_total_ball_to_goal_speed_reward_max: 0.628257087139157
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0037589433696191433
    agent_0_total_ball_to_goal_speed_reward_min: -0.8070109059864139
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-56-00
  done: false
  episode_len_mean: 135.51
  episode_media: {}
  episode_reward_max: 0.004009483699304095
  episode_reward_mean: -0.5465150177471502
  episode_reward_min: -1.4468299520643373
  episodes_this_iter: 27
  episodes_total: 68331
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2561,204160,10244000,-0.546515,0.00400948,-1.44683,135.51


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 40992000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03297000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.19200000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.6271902907688294
    agent_0_total_ball_to_goal_speed_reward_mean: 0.009777294969677254
    agent_0_total_ball_to_goal_speed_reward_min: -0.8070109059864139
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-57-17
  done: false
  episode_len_mean: 129.76
  episode_media: {}
  episode_reward_max: 0.004009483699304095
  episode_reward_mean: -0.502030449645917
  episode_reward_min: -1.4468299520643373
  episodes_this_iter: 33
  episodes_total: 68364
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2562,204238,10248000,-0.50203,0.00400948,-1.44683,129.76


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41008000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03361000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.19200000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.6271902907688294
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0808064707133627
    agent_0_total_ball_to_goal_speed_reward_min: -0.8070109059864139
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_15-58-47
  done: false
  episode_len_mean: 128.85
  episode_media: {}
  episode_reward_max: 0.03133436343903706
  episode_reward_mean: -0.4952597893622734
  episode_reward_min: -1.4060286911895377
  episodes_this_iter: 34
  episodes_total: 68398
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2563,204315,10252000,-0.49526,0.0313344,-1.40603,128.85


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41024000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.030180000000000016
    agent_0_total_agent_position_to_ball_reward_min: -0.19200000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.6271902907688294
    agent_0_total_ball_to_goal_speed_reward_mean: 0.024612127308098287
    agent_0_total_ball_to_goal_speed_reward_min: -0.7255463705921389
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-00-05
  done: false
  episode_len_mean: 110.49
  episode_media: {}
  episode_reward_max: 0.03133436343903706
  episode_reward_mean: -0.44610582169543006
  episode_reward_min: -1.3882046277598024
  episodes_this_iter: 41
  episodes_total: 68439
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2564,204393,10256000,-0.446106,0.0313344,-1.3882,110.49


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41040000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02927000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.17700000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.6258620032601859
    agent_0_total_ball_to_goal_speed_reward_mean: -0.018391043364642395
    agent_0_total_ball_to_goal_speed_reward_min: -0.7255463705921389
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-01-23
  done: false
  episode_len_mean: 102.72
  episode_media: {}
  episode_reward_max: 0.10637391770854931
  episode_reward_mean: -0.4167950482988621
  episode_reward_min: -1.4164479016041347
  episodes_this_iter: 39
  episodes_total: 68478
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2565,204471,10260000,-0.416795,0.106374,-1.41645,102.72


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41056000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.030960000000000015
    agent_0_total_agent_position_to_ball_reward_min: -0.17700000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.7527905144196856
    agent_0_total_ball_to_goal_speed_reward_mean: -0.08056207659102452
    agent_0_total_ball_to_goal_speed_reward_min: -0.7058955957744191
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-02-40
  done: false
  episode_len_mean: 106.74
  episode_media: {}
  episode_reward_max: 0.10637391770854931
  episode_reward_mean: -0.4150380668866758
  episode_reward_min: -1.4164479016041347
  episodes_this_iter: 31
  episodes_total: 68509
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2566,204548,10264000,-0.415038,0.106374,-1.41645,106.74


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41072000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03249000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.17700000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.8278450378856808
    agent_0_total_ball_to_goal_speed_reward_mean: -0.06620849166810401
    agent_0_total_ball_to_goal_speed_reward_min: -0.7058955957744191
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-04-10
  done: false
  episode_len_mean: 115.41
  episode_media: {}
  episode_reward_max: 0.10637391770854931
  episode_reward_mean: -0.45275130085569
  episode_reward_min: -1.4164479016041347
  episodes_this_iter: 34
  episodes_total: 68543
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2567,204625,10268000,-0.452751,0.106374,-1.41645,115.41


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41088000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.031280000000000016
    agent_0_total_agent_position_to_ball_reward_min: -0.22000000000000017
    agent_0_total_ball_to_goal_speed_reward_max: 0.8278450378856808
    agent_0_total_ball_to_goal_speed_reward_mean: -0.06348191470406742
    agent_0_total_ball_to_goal_speed_reward_min: -0.6569462088170098
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-05-27
  done: false
  episode_len_mean: 114.54
  episode_media: {}
  episode_reward_max: 0.0011887520635474047
  episode_reward_mean: -0.4799061298089475
  episode_reward_min: -2.248444002345953
  episodes_this_iter: 37
  episodes_total: 68580
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2568,204703,10272000,-0.479906,0.00118875,-2.24844,114.54


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41104000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02463000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.22500000000000017
    agent_0_total_ball_to_goal_speed_reward_max: 0.8278450378856808
    agent_0_total_ball_to_goal_speed_reward_mean: -0.0031146177327794444
    agent_0_total_ball_to_goal_speed_reward_min: -0.6795813849393049
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-06-45
  done: false
  episode_len_mean: 106.85
  episode_media: {}
  episode_reward_max: 0.1163559769279483
  episode_reward_mean: -0.45232777429634785
  episode_reward_min: -2.248444002345953
  episodes_this_iter: 39
  episodes_total: 68619
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2569,204781,10276000,-0.452328,0.116356,-2.24844,106.85


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41120000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.026440000000000016
    agent_0_total_agent_position_to_ball_reward_min: -0.22500000000000017
    agent_0_total_ball_to_goal_speed_reward_max: 0.6144706578989778
    agent_0_total_ball_to_goal_speed_reward_mean: -0.04875002575055745
    agent_0_total_ball_to_goal_speed_reward_min: -0.6795813849393049
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-08-15
  done: false
  episode_len_mean: 107.44
  episode_media: {}
  episode_reward_max: 0.1163559769279483
  episode_reward_mean: -0.45559958968273084
  episode_reward_min: -2.248444002345953
  episodes_this_iter: 38
  episodes_total: 68657
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2570,204858,10280000,-0.4556,0.116356,-2.24844,107.44


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41136000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.026810000000000018
    agent_0_total_agent_position_to_ball_reward_min: -0.22500000000000017
    agent_0_total_ball_to_goal_speed_reward_max: 0.6705763395363609
    agent_0_total_ball_to_goal_speed_reward_mean: -0.042818645927427304
    agent_0_total_ball_to_goal_speed_reward_min: -0.6795813849393049
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-09-32
  done: false
  episode_len_mean: 114.3
  episode_media: {}
  episode_reward_max: 0.1163559769279483
  episode_reward_mean: -0.47080832581202253
  episode_reward_min: -1.9417245760244455
  episodes_this_iter: 28
  episodes_total: 68685
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2571,204936,10284000,-0.470808,0.116356,-1.94172,114.3


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41152000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.029820000000000027
    agent_0_total_agent_position_to_ball_reward_min: -0.23600000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7571894860585887
    agent_0_total_ball_to_goal_speed_reward_mean: -0.11433414246153939
    agent_0_total_ball_to_goal_speed_reward_min: -0.6859834890919654
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-10-50
  done: false
  episode_len_mean: 120.86
  episode_media: {}
  episode_reward_max: 0.00020568062073467175
  episode_reward_mean: -0.4955193766717541
  episode_reward_min: -1.9417245760244455
  episodes_this_iter: 35
  episodes_total: 68720
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2572,205013,10288000,-0.495519,0.000205681,-1.94172,120.86


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41168000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03268000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.23600000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7571894860585887
    agent_0_total_ball_to_goal_speed_reward_mean: -0.0946174597813206
    agent_0_total_ball_to_goal_speed_reward_min: -0.7086124293090843
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-12-08
  done: false
  episode_len_mean: 130.12
  episode_media: {}
  episode_reward_max: 0.00020568062073467175
  episode_reward_mean: -0.5243570376291299
  episode_reward_min: -1.9417245760244455
  episodes_this_iter: 30
  episodes_total: 68750
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2573,205091,10292000,-0.524357,0.000205681,-1.94172,130.12


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41184000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.034410000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.23600000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7571894860585887
    agent_0_total_ball_to_goal_speed_reward_mean: -0.09230023655432808
    agent_0_total_ball_to_goal_speed_reward_min: -0.7086124293090843
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-13-42
  done: false
  episode_len_mean: 133.6
  episode_media: {}
  episode_reward_max: 0.00020568062073467175
  episode_reward_mean: -0.5441628814563679
  episode_reward_min: -1.8687809638993234
  episodes_this_iter: 27
  episodes_total: 68777
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2574,205172,10296000,-0.544163,0.000205681,-1.86878,133.6


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41200000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.032660000000000015
    agent_0_total_agent_position_to_ball_reward_min: -0.20000000000000015
    agent_0_total_ball_to_goal_speed_reward_max: 0.7558064879928886
    agent_0_total_ball_to_goal_speed_reward_mean: -0.10420374400893298
    agent_0_total_ball_to_goal_speed_reward_min: -0.7086124293090843
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-15-00
  done: false
  episode_len_mean: 127.03
  episode_media: {}
  episode_reward_max: -0.04993079552781454
  episode_reward_mean: -0.518415157168958
  episode_reward_min: -1.852122505152362
  episodes_this_iter: 35
  episodes_total: 68812
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2575,205250,10300000,-0.518415,-0.0499308,-1.85212,127.03


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41216000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.029580000000000023
    agent_0_total_agent_position_to_ball_reward_min: -0.16500000000000012
    agent_0_total_ball_to_goal_speed_reward_max: 0.7558064879928886
    agent_0_total_ball_to_goal_speed_reward_mean: -0.10393180771581674
    agent_0_total_ball_to_goal_speed_reward_min: -0.6770101541029636
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-16-17
  done: false
  episode_len_mean: 118.7
  episode_media: {}
  episode_reward_max: -0.04993079552781454
  episode_reward_mean: -0.49885221794136875
  episode_reward_min: -1.7609101793310615
  episodes_this_iter: 36
  episodes_total: 68848
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2576,205328,10304000,-0.498852,-0.0499308,-1.76091,118.7


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41232000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02829000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.18800000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.7558064879928886
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07648219058684275
    agent_0_total_ball_to_goal_speed_reward_min: -0.8274390482115571
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-17-47
  done: false
  episode_len_mean: 114.16
  episode_media: {}
  episode_reward_max: -0.0217360045846251
  episode_reward_mean: -0.4841446630289461
  episode_reward_min: -1.5855652586820277
  episodes_this_iter: 32
  episodes_total: 68880
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2577,205405,10308000,-0.484145,-0.021736,-1.58557,114.16


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41248000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03207000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2610000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.5772458591507919
    agent_0_total_ball_to_goal_speed_reward_mean: -0.06166313313184266
    agent_0_total_ball_to_goal_speed_reward_min: -0.8274390482115571
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-19-05
  done: false
  episode_len_mean: 114.86
  episode_media: {}
  episode_reward_max: 0.15365926658214368
  episode_reward_mean: -0.5041489541221182
  episode_reward_min: -2.033467241708707
  episodes_this_iter: 34
  episodes_total: 68914
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2578,205483,10312000,-0.504149,0.153659,-2.03347,114.86


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41264000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03524000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2750000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6956866018105003
    agent_0_total_ball_to_goal_speed_reward_mean: -0.008302390183496811
    agent_0_total_ball_to_goal_speed_reward_min: -0.8274390482115571
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-20-23
  done: false
  episode_len_mean: 124.57
  episode_media: {}
  episode_reward_max: 0.15365926658214368
  episode_reward_mean: -0.5216077963024651
  episode_reward_min: -2.033467241708707
  episodes_this_iter: 32
  episodes_total: 68946
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2579,205561,10316000,-0.521608,0.153659,-2.03347,124.57


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41280000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03812000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2750000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6956866018105003
    agent_0_total_ball_to_goal_speed_reward_mean: -0.02523539262970858
    agent_0_total_ball_to_goal_speed_reward_min: -0.6973136465448438
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-21-53
  done: false
  episode_len_mean: 131.32
  episode_media: {}
  episode_reward_max: 0.15365926658214368
  episode_reward_mean: -0.5396512664031407
  episode_reward_min: -2.033467241708707
  episodes_this_iter: 30
  episodes_total: 68976
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2580,205638,10320000,-0.539651,0.153659,-2.03347,131.32


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41296000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.032690000000000025
    agent_0_total_agent_position_to_ball_reward_min: -0.2750000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6956866018105003
    agent_0_total_ball_to_goal_speed_reward_mean: 0.022712130824112924
    agent_0_total_ball_to_goal_speed_reward_min: -0.6993531628689216
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-23-10
  done: false
  episode_len_mean: 131.65
  episode_media: {}
  episode_reward_max: 0.1366136837852272
  episode_reward_mean: -0.5209101885306626
  episode_reward_min: -1.9759032437584745
  episodes_this_iter: 33
  episodes_total: 69009
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2581,205716,10324000,-0.52091,0.136614,-1.9759,131.65


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41312000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.027860000000000017
    agent_0_total_agent_position_to_ball_reward_min: -0.20000000000000015
    agent_0_total_ball_to_goal_speed_reward_max: 0.6499648706403196
    agent_0_total_ball_to_goal_speed_reward_mean: 0.007648230753875983
    agent_0_total_ball_to_goal_speed_reward_min: -0.8887509837320812
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-24-28
  done: false
  episode_len_mean: 119.7
  episode_media: {}
  episode_reward_max: 0.1366136837852272
  episode_reward_mean: -0.48447997789435676
  episode_reward_min: -1.8419701976467837
  episodes_this_iter: 32
  episodes_total: 69041
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2582,205793,10328000,-0.48448,0.136614,-1.84197,119.7


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41328000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.032650000000000026
    agent_0_total_agent_position_to_ball_reward_min: -0.32200000000000023
    agent_0_total_ball_to_goal_speed_reward_max: 0.6182953978846084
    agent_0_total_ball_to_goal_speed_reward_mean: -0.007689305472830683
    agent_0_total_ball_to_goal_speed_reward_min: -0.8887509837320812
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-25-45
  done: false
  episode_len_mean: 125.94
  episode_media: {}
  episode_reward_max: 0.01712034023945641
  episode_reward_mean: -0.5122303094842673
  episode_reward_min: -2.400761935417463
  episodes_this_iter: 32
  episodes_total: 69073
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2583,205870,10332000,-0.51223,0.0171203,-2.40076,125.94


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41344000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03494000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.32200000000000023
    agent_0_total_ball_to_goal_speed_reward_max: 0.5921439912153563
    agent_0_total_ball_to_goal_speed_reward_mean: -0.024879158687836835
    agent_0_total_ball_to_goal_speed_reward_min: -0.8887509837320812
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-27-16
  done: false
  episode_len_mean: 117.17
  episode_media: {}
  episode_reward_max: 0.1610263846175286
  episode_reward_mean: -0.48146502827738785
  episode_reward_min: -2.400761935417463
  episodes_this_iter: 39
  episodes_total: 69112
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2584,205948,10336000,-0.481465,0.161026,-2.40076,117.17


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41360000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.032710000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.32200000000000023
    agent_0_total_ball_to_goal_speed_reward_max: 0.580133612421011
    agent_0_total_ball_to_goal_speed_reward_mean: -0.04910233211952035
    agent_0_total_ball_to_goal_speed_reward_min: -0.7343326517436584
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-28-33
  done: false
  episode_len_mean: 109.8
  episode_media: {}
  episode_reward_max: 0.1610263846175286
  episode_reward_mean: -0.46279084377592405
  episode_reward_min: -2.400761935417463
  episodes_this_iter: 40
  episodes_total: 69152
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2585,206025,10340000,-0.462791,0.161026,-2.40076,109.8


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41376000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02269000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.23800000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.6335604927295831
    agent_0_total_ball_to_goal_speed_reward_mean: 0.015625377020285543
    agent_0_total_ball_to_goal_speed_reward_min: -0.7343326517436584
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-29-51
  done: false
  episode_len_mean: 93.85
  episode_media: {}
  episode_reward_max: 0.1610263846175286
  episode_reward_mean: -0.417113897782988
  episode_reward_min: -1.5579068431670595
  episodes_this_iter: 43
  episodes_total: 69195
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2586,206103,10344000,-0.417114,0.161026,-1.55791,93.85


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41392000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.027530000000000013
    agent_0_total_agent_position_to_ball_reward_min: -0.23800000000000018
    agent_0_total_ball_to_goal_speed_reward_max: 0.7284563968401802
    agent_0_total_ball_to_goal_speed_reward_mean: 0.024644728248441005
    agent_0_total_ball_to_goal_speed_reward_min: -0.6416389316583455
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-31-08
  done: false
  episode_len_mean: 103.41
  episode_media: {}
  episode_reward_max: 0.030303320109465126
  episode_reward_mean: -0.43714425075605307
  episode_reward_min: -1.6388843327082605
  episodes_this_iter: 34
  episodes_total: 69229
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2587,206180,10348000,-0.437144,0.0303033,-1.63888,103.41


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41408000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.027940000000000017
    agent_0_total_agent_position_to_ball_reward_min: -0.2870000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7284563968401802
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03100718164626851
    agent_0_total_ball_to_goal_speed_reward_min: -0.6547192559414274
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-32-39
  done: false
  episode_len_mean: 102.27
  episode_media: {}
  episode_reward_max: 0.13320130009450448
  episode_reward_mean: -0.4299049817906265
  episode_reward_min: -1.6388843327082605
  episodes_this_iter: 42
  episodes_total: 69271
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2588,206258,10352000,-0.429905,0.133201,-1.63888,102.27


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41424000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03155000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2870000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7284563968401802
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06155818086409874
    agent_0_total_ball_to_goal_speed_reward_min: -0.7169820188724193
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-33-56
  done: false
  episode_len_mean: 114.85
  episode_media: {}
  episode_reward_max: 0.13320130009450448
  episode_reward_mean: -0.46431775711321455
  episode_reward_min: -1.6388843327082605
  episodes_this_iter: 25
  episodes_total: 69296
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2589,206335,10356000,-0.464318,0.133201,-1.63888,114.85


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41440000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03212000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2870000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.6715292351208841
    agent_0_total_ball_to_goal_speed_reward_mean: -0.019063271119834156
    agent_0_total_ball_to_goal_speed_reward_min: -0.7169820188724193
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-35-14
  done: false
  episode_len_mean: 113.64
  episode_media: {}
  episode_reward_max: 0.016071717325893076
  episode_reward_mean: -0.4789613144342389
  episode_reward_min: -2.2221571860461973
  episodes_this_iter: 39
  episodes_total: 69335
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2590,206413,10360000,-0.478961,0.0160717,-2.22216,113.64


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41456000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03994000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.3100000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7090023921249765
    agent_0_total_ball_to_goal_speed_reward_mean: 0.03415681972591207
    agent_0_total_ball_to_goal_speed_reward_min: -0.7169820188724193
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-36-31
  done: false
  episode_len_mean: 132.25
  episode_media: {}
  episode_reward_max: 0.016071717325893076
  episode_reward_mean: -0.5412719246976727
  episode_reward_min: -2.2221571860461973
  episodes_this_iter: 22
  episodes_total: 69357
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2591,206490,10364000,-0.541272,0.0160717,-2.22216,132.25


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41472000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03366000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.3100000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7090023921249765
    agent_0_total_ball_to_goal_speed_reward_mean: -0.021954487590471174
    agent_0_total_ball_to_goal_speed_reward_min: -0.7010543122153214
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-38-01
  done: false
  episode_len_mean: 124.07
  episode_media: {}
  episode_reward_max: 0.08543672745934627
  episode_reward_mean: -0.5083587659818491
  episode_reward_min: -2.2221571860461973
  episodes_this_iter: 39
  episodes_total: 69396
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2592,206567,10368000,-0.508359,0.0854367,-2.22216,124.07


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41488000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03724000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.3100000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7090023921249765
    agent_0_total_ball_to_goal_speed_reward_mean: -0.023820562250733816
    agent_0_total_ball_to_goal_speed_reward_min: -0.6273681583119959
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-39-18
  done: false
  episode_len_mean: 128.44
  episode_media: {}
  episode_reward_max: 0.08543672745934627
  episode_reward_mean: -0.502164641753458
  episode_reward_min: -1.975553440988025
  episodes_this_iter: 30
  episodes_total: 69426
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2593,206645,10372000,-0.502165,0.0854367,-1.97555,128.44


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41504000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03780000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2740000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7287822883741338
    agent_0_total_ball_to_goal_speed_reward_mean: -0.036713587551963925
    agent_0_total_ball_to_goal_speed_reward_min: -0.65887377607766
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-40-36
  done: false
  episode_len_mean: 130.55
  episode_media: {}
  episode_reward_max: 0.08543672745934627
  episode_reward_mean: -0.5174232215050908
  episode_reward_min: -1.975553440988025
  episodes_this_iter: 22
  episodes_total: 69448
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2594,206722,10376000,-0.517423,0.0854367,-1.97555,130.55


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41520000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03673000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2740000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7287822883741338
    agent_0_total_ball_to_goal_speed_reward_mean: -0.05544071974459553
    agent_0_total_ball_to_goal_speed_reward_min: -0.827942313859847
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-42-05
  done: false
  episode_len_mean: 133.06
  episode_media: {}
  episode_reward_max: 0.09258268899201161
  episode_reward_mean: -0.5120792123268783
  episode_reward_min: -2.842807680759061
  episodes_this_iter: 38
  episodes_total: 69486
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
     

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2595,206799,10380000,-0.512079,0.0925827,-2.84281,133.06


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41536000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03904000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2740000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.8291923107623477
    agent_0_total_ball_to_goal_speed_reward_mean: -0.08230940999221589
    agent_0_total_ball_to_goal_speed_reward_min: -0.827942313859847
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-43-23
  done: false
  episode_len_mean: 134.01
  episode_media: {}
  episode_reward_max: 0.1410696750272602
  episode_reward_mean: -0.515557852593984
  episode_reward_min: -2.842807680759061
  episodes_this_iter: 32
  episodes_total: 69518
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
       

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2596,206877,10384000,-0.515558,0.14107,-2.84281,134.01


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41552000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.028180000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.2660000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.8291923107623477
    agent_0_total_ball_to_goal_speed_reward_mean: 0.033175760897093384
    agent_0_total_ball_to_goal_speed_reward_min: -0.6762244963782401
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-44-41
  done: false
  episode_len_mean: 106.9
  episode_media: {}
  episode_reward_max: 0.1410696750272602
  episode_reward_mean: -0.4133357210075394
  episode_reward_min: -2.2433929996382265
  episodes_this_iter: 41
  episodes_total: 69559
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2597,206955,10388000,-0.413336,0.14107,-2.24339,106.9


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41568000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.025880000000000025
    agent_0_total_agent_position_to_ball_reward_min: -0.2660000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.8291923107623477
    agent_0_total_ball_to_goal_speed_reward_mean: 0.06084339509989796
    agent_0_total_ball_to_goal_speed_reward_min: -0.7089848968198156
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-45-59
  done: false
  episode_len_mean: 110.04
  episode_media: {}
  episode_reward_max: 0.1410696750272602
  episode_reward_mean: -0.44227968162344816
  episode_reward_min: -2.2433929996382265
  episodes_this_iter: 34
  episodes_total: 69593
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2598,207032,10392000,-0.44228,0.14107,-2.24339,110.04


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41584000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.02844000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.17500000000000013
    agent_0_total_ball_to_goal_speed_reward_max: 0.6098003054464456
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07961472657801573
    agent_0_total_ball_to_goal_speed_reward_min: -0.7089848968198156
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-47-28
  done: false
  episode_len_mean: 114.79
  episode_media: {}
  episode_reward_max: 0.132235417267334
  episode_reward_mean: -0.472328768393695
  episode_reward_min: -1.5959115688519052
  episodes_this_iter: 28
  episodes_total: 69621
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
      

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2599,207109,10396000,-0.472329,0.132235,-1.59591,114.79


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41600000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03453000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.3810000000000003
    agent_0_total_ball_to_goal_speed_reward_max: 0.6098003054464456
    agent_0_total_ball_to_goal_speed_reward_mean: 0.0405679354176991
    agent_0_total_ball_to_goal_speed_reward_min: -0.7089848968198156
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-48-46
  done: false
  episode_len_mean: 128.34
  episode_media: {}
  episode_reward_max: 0.132235417267334
  episode_reward_mean: -0.5260745930824493
  episode_reward_min: -2.099393039784925
  episodes_this_iter: 24
  episodes_total: 69645
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2600,207187,10400000,-0.526075,0.132235,-2.09939,128.34


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41616000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.042840000000000024
    agent_0_total_agent_position_to_ball_reward_min: -0.3810000000000003
    agent_0_total_ball_to_goal_speed_reward_max: 0.6755013400760613
    agent_0_total_ball_to_goal_speed_reward_mean: -0.030762652336208892
    agent_0_total_ball_to_goal_speed_reward_min: -0.8499868936746766
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-50-03
  done: false
  episode_len_mean: 136.27
  episode_media: {}
  episode_reward_max: 0.06531239955631385
  episode_reward_mean: -0.5699163113301231
  episode_reward_min: -3.273787704683513
  episodes_this_iter: 33
  episodes_total: 69678
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2601,207264,10404000,-0.569916,0.0653124,-3.27379,136.27


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41632000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.04311000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.3810000000000003
    agent_0_total_ball_to_goal_speed_reward_max: 0.7526320962514916
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07679844920814378
    agent_0_total_ball_to_goal_speed_reward_min: -0.8499868936746766
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-51-21
  done: false
  episode_len_mean: 137.34
  episode_media: {}
  episode_reward_max: 0.07392594260920582
  episode_reward_mean: -0.5492852532427763
  episode_reward_min: -3.273787704683513
  episodes_this_iter: 28
  episodes_total: 69706
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2602,207342,10408000,-0.549285,0.0739259,-3.27379,137.34


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41648000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.041090000000000015
    agent_0_total_agent_position_to_ball_reward_min: -0.2920000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7526320962514916
    agent_0_total_ball_to_goal_speed_reward_mean: -0.12094303869106174
    agent_0_total_ball_to_goal_speed_reward_min: -0.8499868936746766
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-52-51
  done: false
  episode_len_mean: 136.55
  episode_media: {}
  episode_reward_max: 0.07392594260920582
  episode_reward_mean: -0.5411274186822905
  episode_reward_min: -3.273787704683513
  episodes_this_iter: 28
  episodes_total: 69734
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2603,207419,10412000,-0.541127,0.0739259,-3.27379,136.55


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41664000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03763000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.2920000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.7526320962514916
    agent_0_total_ball_to_goal_speed_reward_mean: -0.062229926383431104
    agent_0_total_ball_to_goal_speed_reward_min: -0.6954560450306507
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-54-09
  done: false
  episode_len_mean: 134.29
  episode_media: {}
  episode_reward_max: 0.07392594260920582
  episode_reward_mean: -0.5332527361545217
  episode_reward_min: -2.771519311041403
  episodes_this_iter: 29
  episodes_total: 69763
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2604,207497,10416000,-0.533253,0.0739259,-2.77152,134.29


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41680000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03630000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2920000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.5990315247033329
    agent_0_total_ball_to_goal_speed_reward_mean: -0.022523007218630466
    agent_0_total_ball_to_goal_speed_reward_min: -0.6954560450306507
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-55-26
  done: false
  episode_len_mean: 134.57
  episode_media: {}
  episode_reward_max: 0.13827778586810302
  episode_reward_mean: -0.5248108950902227
  episode_reward_min: -2.771519311041403
  episodes_this_iter: 34
  episodes_total: 69797
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2605,207575,10420000,-0.524811,0.138278,-2.77152,134.57


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41696000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03505000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.2750000000000002
    agent_0_total_ball_to_goal_speed_reward_max: 0.5990315247033329
    agent_0_total_ball_to_goal_speed_reward_mean: -0.06943796473288658
    agent_0_total_ball_to_goal_speed_reward_min: -0.7544232127183711
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-56-56
  done: false
  episode_len_mean: 133.56
  episode_media: {}
  episode_reward_max: 0.13827778586810302
  episode_reward_mean: -0.5378609147301053
  episode_reward_min: -1.9294352197422955
  episodes_this_iter: 25
  episodes_total: 69822
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2606,207652,10424000,-0.537861,0.138278,-1.92944,133.56


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41712000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03610000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.21000000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.7026305253025955
    agent_0_total_ball_to_goal_speed_reward_mean: -0.07289109594849895
    agent_0_total_ball_to_goal_speed_reward_min: -0.7544232127183711
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-58-15
  done: false
  episode_len_mean: 135.9
  episode_media: {}
  episode_reward_max: 0.13827778586810302
  episode_reward_mean: -0.535837300081303
  episode_reward_min: -1.9294352197422955
  episodes_this_iter: 30
  episodes_total: 69852
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
    

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2607,207731,10428000,-0.535837,0.138278,-1.92944,135.9


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41728000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03329000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.21000000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.7026305253025955
    agent_0_total_ball_to_goal_speed_reward_mean: -0.06300567833423709
    agent_0_total_ball_to_goal_speed_reward_min: -0.7544232127183711
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_16-59-40
  done: false
  episode_len_mean: 127.2
  episode_media: {}
  episode_reward_max: 0.08079145363947227
  episode_reward_mean: -0.5084356158382842
  episode_reward_min: -1.9016190505056274
  episodes_this_iter: 38
  episodes_total: 69890
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2608,207817,10432000,-0.508436,0.0807915,-1.90162,127.2


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41744000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03051000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.20700000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.8589570972208173
    agent_0_total_ball_to_goal_speed_reward_mean: -0.05652101325900091
    agent_0_total_ball_to_goal_speed_reward_min: -0.7254137422478788
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-01-19
  done: false
  episode_len_mean: 111.64
  episode_media: {}
  episode_reward_max: 0.08079145363947227
  episode_reward_mean: -0.44073669678421196
  episode_reward_min: -1.1700385908059228
  episodes_this_iter: 36
  episodes_total: 69926
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2609,207902,10436000,-0.440737,0.0807915,-1.17004,111.64


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41760000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.030560000000000018
    agent_0_total_agent_position_to_ball_reward_min: -0.20700000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.8589570972208173
    agent_0_total_ball_to_goal_speed_reward_mean: 0.011613351435426815
    agent_0_total_ball_to_goal_speed_reward_min: -0.7254137422478788
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-02-42
  done: false
  episode_len_mean: 112.3
  episode_media: {}
  episode_reward_max: 0.11755307410085969
  episode_reward_mean: -0.44017811168731547
  episode_reward_min: -1.5916696579924214
  episodes_this_iter: 34
  episodes_total: 69960
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2610,207984,10440000,-0.440178,0.117553,-1.59167,112.3


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41776000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.030950000000000023
    agent_0_total_agent_position_to_ball_reward_min: -0.18400000000000014
    agent_0_total_ball_to_goal_speed_reward_max: 0.8589570972208173
    agent_0_total_ball_to_goal_speed_reward_mean: 0.007070118019116629
    agent_0_total_ball_to_goal_speed_reward_min: -0.7254137422478788
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-04-02
  done: false
  episode_len_mean: 110.95
  episode_media: {}
  episode_reward_max: 0.11755307410085969
  episode_reward_mean: -0.43180148561668547
  episode_reward_min: -1.5916696579924214
  episodes_this_iter: 37
  episodes_total: 69997
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2611,208065,10444000,-0.431801,0.117553,-1.59167,110.95


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41792000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.036840000000000026
    agent_0_total_agent_position_to_ball_reward_min: -0.36300000000000027
    agent_0_total_ball_to_goal_speed_reward_max: 0.7692157193081476
    agent_0_total_ball_to_goal_speed_reward_mean: 0.07398761631106038
    agent_0_total_ball_to_goal_speed_reward_min: -0.5952652364035179
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-05-23
  done: false
  episode_len_mean: 123.29
  episode_media: {}
  episode_reward_max: 0.11755307410085969
  episode_reward_mean: -0.48231809390249153
  episode_reward_min: -1.8761764908501244
  episodes_this_iter: 28
  episodes_total: 70025
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2612,208145,10448000,-0.482318,0.117553,-1.87618,123.29


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41808000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03980000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.36300000000000027
    agent_0_total_ball_to_goal_speed_reward_max: 0.7692157193081476
    agent_0_total_ball_to_goal_speed_reward_mean: -0.05679941039452662
    agent_0_total_ball_to_goal_speed_reward_min: -0.7609068944421405
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-06-55
  done: false
  episode_len_mean: 126.95
  episode_media: {}
  episode_reward_max: 0.07692681234649301
  episode_reward_mean: -0.5187525298093809
  episode_reward_min: -1.8761764908501244
  episodes_this_iter: 30
  episodes_total: 70055
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2613,208225,10452000,-0.518753,0.0769268,-1.87618,126.95


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41824000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03963000000000003
    agent_0_total_agent_position_to_ball_reward_min: -0.36300000000000027
    agent_0_total_ball_to_goal_speed_reward_max: 0.6858241851528817
    agent_0_total_ball_to_goal_speed_reward_mean: -0.04900595885371163
    agent_0_total_ball_to_goal_speed_reward_min: -0.7705691104558529
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-08-16
  done: false
  episode_len_mean: 121.72
  episode_media: {}
  episode_reward_max: 0.23252741518076636
  episode_reward_mean: -0.5287717411364778
  episode_reward_min: -1.8761764908501244
  episodes_this_iter: 41
  episodes_total: 70096
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2614,208305,10456000,-0.528772,0.232527,-1.87618,121.72


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41840000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.03225000000000002
    agent_0_total_agent_position_to_ball_reward_min: -0.19900000000000015
    agent_0_total_ball_to_goal_speed_reward_max: 0.627181971874151
    agent_0_total_ball_to_goal_speed_reward_mean: -0.043916652142175557
    agent_0_total_ball_to_goal_speed_reward_min: -0.7705691104558529
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-09-36
  done: false
  episode_len_mean: 110.09
  episode_media: {}
  episode_reward_max: 0.23252741518076636
  episode_reward_mean: -0.49666125769361097
  episode_reward_min: -1.7550897804917425
  episodes_this_iter: 35
  episodes_total: 70131
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2615,208385,10460000,-0.496661,0.232527,-1.75509,110.09


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41856000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.029020000000000018
    agent_0_total_agent_position_to_ball_reward_min: -0.20600000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.7648601077499572
    agent_0_total_ball_to_goal_speed_reward_mean: 0.02744403548194295
    agent_0_total_ball_to_goal_speed_reward_min: -0.7705691104558529
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-11-10
  done: false
  episode_len_mean: 108.15
  episode_media: {}
  episode_reward_max: 0.23252741518076636
  episode_reward_mean: -0.467313664185941
  episode_reward_min: -1.5677932639272845
  episodes_this_iter: 29
  episodes_total: 70160
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2616,208465,10464000,-0.467314,0.232527,-1.56779,108.15


Result for PPO_Soccer_491c2_00000:
  agent_timesteps_total: 41872000
  custom_metrics:
    agent_0_total_agent_position_to_ball_reward_max: 0.0
    agent_0_total_agent_position_to_ball_reward_mean: -0.028220000000000012
    agent_0_total_agent_position_to_ball_reward_min: -0.20600000000000016
    agent_0_total_ball_to_goal_speed_reward_max: 0.7648601077499572
    agent_0_total_ball_to_goal_speed_reward_mean: 0.022743142413394354
    agent_0_total_ball_to_goal_speed_reward_min: -0.7382406555875302
    agent_0_total_env_reward_max: 0.0
    agent_0_total_env_reward_mean: 0.0
    agent_0_total_env_reward_min: 0.0
  date: 2021-12-03_17-12-30
  done: false
  episode_len_mean: 114.31
  episode_media: {}
  episode_reward_max: 0.1217560268613278
  episode_reward_mean: -0.4917774515933698
  episode_reward_min: -1.5677932639272845
  episodes_this_iter: 34
  episodes_total: 70194
  experiment_id: 51c83eec84bd4de89e8eaf92d7325d10
  hostname: bruno-odyssey-mint
  info:
    learner:
      default:
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_Soccer_491c2_00000,RUNNING,192.168.0.108:95010,2617,208545,10468000,-0.491777,0.121756,-1.56779,114.31


## Export agent

In [3]:
this_path = os.path.dirname(os.path.realpath("__file__"))
print('this_path', this_path)


def export_agent(agent_file: str, TRIAL, agent_name="my_ray_soccer_agent", makeZip=False):
    agent_path = os.path.join(f'{this_path}/agents', agent_name)
    os.makedirs(agent_path, exist_ok=True)


    shutil.rmtree(agent_path)
    os.makedirs(agent_path)

    # salva a classe do agente
    with open(os.path.join(agent_path, "agent.py"), "w") as f:
        f.write(agent_file)

    # salva um __init__ para criar o módulo Python
    with open(os.path.join(agent_path, "__init__.py"), "w") as f:
        f.write("from .agent import MyRaySoccerAgent")

    # copia o trial inteiro, incluindo os arquivos de configuração do experimento
    print(f"TRIALLL {TRIAL}")
    shutil.copytree(TRIAL, os.path.join(
        agent_path, TRIAL.split("ray_results/")[1]), )

    # empacota tudo num arquivo .zip
    if makeZip:
        shutil.make_archive(os.path.join(agent_path, agent_name),
                            "zip", os.path.join(agent_path, agent_name))


def get_agent_file_str(ALGORITHM, CHECKPOINT, POLICY_NAME="default"):
    return f"""
import pickle
import os
from typing import Dict

import gym
import numpy as np
import ray
from ray import tune
from ray.rllib.env.base_env import BaseEnv
from ray.tune.registry import get_trainable_cls

from soccer_twos import AgentInterface

ALGORITHM = "{ALGORITHM}"
CHECKPOINT_PATH = os.path.join(
    os.path.dirname(os.path.abspath(__file__)), 
    "{CHECKPOINT.split("ray_results/")[1]}"
)
POLICY_NAME = "{POLICY_NAME}"


class MyRaySoccerAgent(AgentInterface):
    def __init__(self, env: gym.Env):
        super().__init__()
        ray.init(ignore_reinit_error=True)

        # Load configuration from checkpoint file.
        config_path = ""
        if CHECKPOINT_PATH:
            config_dir = os.path.dirname(CHECKPOINT_PATH)
            config_path = os.path.join(config_dir, "params.pkl")
            # Try parent directory.
            if not os.path.exists(config_path):
                config_path = os.path.join(config_dir, "../params.pkl")

        # Load the config from pickled.
        if os.path.exists(config_path):
            with open(config_path, "rb") as f:
                config = pickle.load(f)
        else:
            # If no config in given checkpoint -> Error.
            raise ValueError(
                "Could not find params.pkl in either the checkpoint dir or "
                "its parent directory!"
            )

        # no need for parallelism on evaluation
        config["num_workers"] = 0
        config["num_gpus"] = 0

        # create a dummy env since it's required but we only care about the policy
        tune.registry.register_env("DummyEnv", lambda *_: BaseEnv())
        config["env"] = "DummyEnv"

        # create the Trainer from config
        cls = get_trainable_cls(ALGORITHM)
        agent = cls(env=config["env"], config=config)
        # load state from checkpoint
        agent.restore(CHECKPOINT_PATH)
        # get policy for evaluation
        self.policy = agent.get_policy(POLICY_NAME)

    def act(self, observation: Dict[int, np.ndarray]) -> Dict[int, np.ndarray]:
        actions = {{}}
        for player_id in observation:
            # compute_single_action returns a tuple of (action, action_info, ...)
            # as we only need the action, we discard the other elements
            actions[player_id], *_ = self.policy.compute_single_action(
                observation[player_id]
            )
        return actions

"""


def getAnalysis(experiment: str):
    return Analysis(experiment)


def export():
    # PPO_Soccer_18d23_00000
    # /home/bruno/Workspace/soccer-tows-player/src/ray_results/Testing_env/PPO_Soccer_18d23_00000_0_2021-11-24_20-34-41/checkpoint_000500/checkpoint-500
    analysis = getAnalysis(
        "/home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_player_custom_rewards")

    ALGORITHM = "PPO"
    TRIAL = analysis.get_best_logdir("training_iteration", "max")
    CHECKPOINT = analysis.get_best_checkpoint(
        TRIAL,
        "training_iteration",
        "max",
    )

    print(TRIAL, CHECKPOINT)
    agent_file = get_agent_file_str(ALGORITHM, CHECKPOINT)
    export_agent(agent_file, TRIAL)


export()


this_path /home/bruno/Workspace/soccer-tows-player/src/experiments/ppo_multiagent
/home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_player_custom_rewards/PPO_Soccer_491c2_00000_0_2021-11-30_01-16-26 /home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_player_custom_rewards/PPO_Soccer_491c2_00000_0_2021-11-30_01-16-26/checkpoint_002617/checkpoint-2617
TRIALLL /home/bruno/Workspace/soccer-tows-player/src/ray_results/PPO_multiagent_player_custom_rewards/PPO_Soccer_491c2_00000_0_2021-11-30_01-16-26
