From 89417d1ab9e375ace64f2095f8366eeabf513027 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 15 Jun 2018 17:07:00 -0700 Subject: [PATCH 1/6] [Attempted fix] --- python/unitytrainers/ppo/trainer.py | 49 +++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index fa583e7e07..86eb5dc811 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -196,32 +196,59 @@ def take_action(self, all_brain_info: AllBrainInfo): else: return run_out[self.model.output], None, None, run_out - def generate_intrinsic_rewards(self, curr_info, next_info): + def generate_intrinsic_rewards(self, next_info): """ Generates intrinsic reward used for Curiosity-based training. - :param curr_info: Current BrainInfo. :param next_info: Next BrainInfo. :return: Intrinsic rewards for all agents. """ if self.use_curiosity: - feed_dict = {self.model.batch_size: len(curr_info.vector_observations), self.model.sequence_length: 1} + agent_index_to_ignore = [] + for agent_index, agent_id in enumerate(next_info.agents): + if self.training_buffer[agent_id].last_brain_info is None: + agent_index_to_ignore.append(agent_index) + feed_dict = {self.model.batch_size: len(next_info.vector_observations), self.model.sequence_length: 1} if self.is_continuous_action: feed_dict[self.model.output] = next_info.previous_vector_actions else: feed_dict[self.model.action_holder] = next_info.previous_vector_actions.flatten() if self.use_visual_obs: - for i in range(len(curr_info.visual_observations)): - feed_dict[self.model.visual_in[i]] = curr_info.visual_observations[i] + for i in range(len(next_info.visual_observations)): + tmp = [] + for agent_id in next_info.agents: + agent_brain_info = self.training_buffer[agent_id].last_brain_info + if agent_brain_info is None: + agent_brain_info = next_info + agent_obs = agent_brain_info.visual_observations[i][agent_brain_info.agents.index(agent_id)] + tmp += [agent_obs] + feed_dict[self.model.visual_in[i]] = np.array(tmp) feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i] if self.use_vector_obs: - feed_dict[self.model.vector_in] = curr_info.vector_observations + tmp = [] + for agent_id in next_info.agents: + agent_brain_info = self.training_buffer[agent_id].last_brain_info + if agent_brain_info is None: + agent_brain_info = next_info + agent_obs = agent_brain_info.vector_observations[agent_brain_info.agents.index(agent_id)] + tmp += [agent_obs] + feed_dict[self.model.vector_in] = np.array(tmp) feed_dict[self.model.next_vector_in] = next_info.vector_observations if self.use_recurrent: - if curr_info.memories.shape[1] == 0: - curr_info.memories = np.zeros((len(curr_info.agents), self.m_size)) - feed_dict[self.model.memory_in] = curr_info.memories + tmp = [] + for agent_id in next_info.agents: + agent_brain_info = self.training_buffer[agent_id].last_brain_info + if agent_brain_info is None: + agent_brain_info = next_info + if agent_brain_info.memories.shape[1] == 0: + agent_obs = np.zeros(self.m_size) + else: + agent_obs = agent_brain_info.memories[agent_brain_info.agents.index(agent_id)] + tmp += [agent_obs] + feed_dict[self.model.memory_in] = np.array(tmp) intrinsic_rewards = self.sess.run(self.model.intrinsic_reward, feed_dict=feed_dict) * float(self.has_updated) + for index in agent_index_to_ignore: + intrinsic_rewards[index] = 0 return intrinsic_rewards else: return None @@ -259,12 +286,14 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn curr_info = curr_all_info[self.brain_name] next_info = next_all_info[self.brain_name] - intrinsic_rewards = self.generate_intrinsic_rewards(curr_info, next_info) + # intrinsic_rewards = self.generate_intrinsic_rewards(curr_info, next_info) for agent_id in curr_info.agents: self.training_buffer[agent_id].last_brain_info = curr_info self.training_buffer[agent_id].last_take_action_outputs = take_action_outputs + intrinsic_rewards = self.generate_intrinsic_rewards(next_info) + for agent_id in next_info.agents: stored_info = self.training_buffer[agent_id].last_brain_info stored_take_action_outputs = self.training_buffer[agent_id].last_take_action_outputs From 582432f5a0dfd3ea2f49b2d6a468e306ddb55ed2 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Thu, 28 Jun 2018 16:33:31 -0700 Subject: [PATCH 2/6] Use switch between old and new behavior --- python/unitytrainers/ppo/trainer.py | 95 +++++++++++++++++------------ 1 file changed, 55 insertions(+), 40 deletions(-) diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index b59ad7a338..42fb4c8138 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -196,60 +196,77 @@ def take_action(self, all_brain_info: AllBrainInfo): else: return run_out[self.model.output], None, None, run_out - def generate_intrinsic_rewards(self, next_info): + def generate_intrinsic_rewards(self, curr_info, next_info): """ Generates intrinsic reward used for Curiosity-based training. :param next_info: Next BrainInfo. :return: Intrinsic rewards for all agents. """ if self.use_curiosity: - agent_index_to_ignore = [] - for agent_index, agent_id in enumerate(next_info.agents): - if self.training_buffer[agent_id].last_brain_info is None: - agent_index_to_ignore.append(agent_index) feed_dict = {self.model.batch_size: len(next_info.vector_observations), self.model.sequence_length: 1} if self.is_continuous_action: feed_dict[self.model.output] = next_info.previous_vector_actions else: feed_dict[self.model.action_holder] = next_info.previous_vector_actions.flatten() - if self.use_visual_obs: - for i in range(len(next_info.visual_observations)): + + if curr_info.agents == next_info.agents: + if self.use_visual_obs: + for i in range(len(curr_info.visual_observations)): + feed_dict[self.model.visual_in[i]] = curr_info.visual_observations[i] + feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i] + if self.use_vector_obs: + feed_dict[self.model.vector_in] = curr_info.vector_observations + feed_dict[self.model.next_vector_in] = next_info.vector_observations + if self.use_recurrent: + if curr_info.memories.shape[1] == 0: + curr_info.memories = np.zeros((len(curr_info.agents), self.m_size)) + feed_dict[self.model.memory_in] = curr_info.memories + intrinsic_rewards = self.sess.run(self.model.intrinsic_reward, + feed_dict=feed_dict) * float(self.has_updated) + return intrinsic_rewards + else: + agent_index_to_ignore = [] + for agent_index, agent_id in enumerate(next_info.agents): + if self.training_buffer[agent_id].last_brain_info is None: + agent_index_to_ignore.append(agent_index) + if self.use_visual_obs: + for i in range(len(next_info.visual_observations)): + tmp = [] + for agent_id in next_info.agents: + agent_brain_info = self.training_buffer[agent_id].last_brain_info + if agent_brain_info is None: + agent_brain_info = next_info + agent_obs = agent_brain_info.visual_observations[i][agent_brain_info.agents.index(agent_id)] + tmp += [agent_obs] + feed_dict[self.model.visual_in[i]] = np.array(tmp) + feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i] + if self.use_vector_obs: tmp = [] for agent_id in next_info.agents: agent_brain_info = self.training_buffer[agent_id].last_brain_info if agent_brain_info is None: agent_brain_info = next_info - agent_obs = agent_brain_info.visual_observations[i][agent_brain_info.agents.index(agent_id)] + agent_obs = agent_brain_info.vector_observations[agent_brain_info.agents.index(agent_id)] tmp += [agent_obs] - feed_dict[self.model.visual_in[i]] = np.array(tmp) - feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i] - if self.use_vector_obs: - tmp = [] - for agent_id in next_info.agents: - agent_brain_info = self.training_buffer[agent_id].last_brain_info - if agent_brain_info is None: - agent_brain_info = next_info - agent_obs = agent_brain_info.vector_observations[agent_brain_info.agents.index(agent_id)] - tmp += [agent_obs] - feed_dict[self.model.vector_in] = np.array(tmp) - feed_dict[self.model.next_vector_in] = next_info.vector_observations - if self.use_recurrent: - tmp = [] - for agent_id in next_info.agents: - agent_brain_info = self.training_buffer[agent_id].last_brain_info - if agent_brain_info is None: - agent_brain_info = next_info - if agent_brain_info.memories.shape[1] == 0: - agent_obs = np.zeros(self.m_size) - else: - agent_obs = agent_brain_info.memories[agent_brain_info.agents.index(agent_id)] - tmp += [agent_obs] - feed_dict[self.model.memory_in] = np.array(tmp) - intrinsic_rewards = self.sess.run(self.model.intrinsic_reward, - feed_dict=feed_dict) * float(self.has_updated) - for index in agent_index_to_ignore: - intrinsic_rewards[index] = 0 - return intrinsic_rewards + feed_dict[self.model.vector_in] = np.array(tmp) + feed_dict[self.model.next_vector_in] = next_info.vector_observations + if self.use_recurrent: + tmp = [] + for agent_id in next_info.agents: + agent_brain_info = self.training_buffer[agent_id].last_brain_info + if agent_brain_info is None: + agent_brain_info = next_info + if agent_brain_info.memories.shape[1] == 0: + agent_obs = np.zeros(self.m_size) + else: + agent_obs = agent_brain_info.memories[agent_brain_info.agents.index(agent_id)] + tmp += [agent_obs] + feed_dict[self.model.memory_in] = np.array(tmp) + intrinsic_rewards = self.sess.run(self.model.intrinsic_reward, + feed_dict=feed_dict) * float(self.has_updated) + for index in agent_index_to_ignore: + intrinsic_rewards[index] = 0 + return intrinsic_rewards else: return None @@ -286,13 +303,11 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn curr_info = curr_all_info[self.brain_name] next_info = next_all_info[self.brain_name] - # intrinsic_rewards = self.generate_intrinsic_rewards(curr_info, next_info) - for agent_id in curr_info.agents: self.training_buffer[agent_id].last_brain_info = curr_info self.training_buffer[agent_id].last_take_action_outputs = take_action_outputs - intrinsic_rewards = self.generate_intrinsic_rewards(next_info) + intrinsic_rewards = self.generate_intrinsic_rewards(curr_info, next_info) for agent_id in next_info.agents: stored_info = self.training_buffer[agent_id].last_brain_info From 33328c329c2afcc7b33be59f0cd01045f8672f96 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Thu, 28 Jun 2018 17:00:42 -0700 Subject: [PATCH 3/6] Simplify approach --- python/unityagents/brain.py | 18 ++--- python/unitytrainers/ppo/trainer.py | 120 ++++++++++++++-------------- 2 files changed, 69 insertions(+), 69 deletions(-) diff --git a/python/unityagents/brain.py b/python/unityagents/brain.py index 2188291018..d2b16d0fcb 100755 --- a/python/unityagents/brain.py +++ b/python/unityagents/brain.py @@ -3,8 +3,8 @@ class BrainInfo: def __init__(self, visual_observation, vector_observation, text_observations, memory=None, - reward=None, agents=None, local_done=None, - vector_action=None, text_action=None, max_reached=None): + reward=None, agents=None, local_done=None, + vector_action=None, text_action=None, max_reached=None): """ Describes experience at current step of all agents linked to a brain. """ @@ -49,10 +49,10 @@ def __str__(self): Vector Action space type: {5} Vector Action space size (per agent): {6} Vector Action descriptions: {7}'''.format(self.brain_name, - str(self.number_visual_observations), - self.vector_observation_space_type, - str(self.vector_observation_space_size), - str(self.num_stacked_vector_observations), - self.vector_action_space_type, - str(self.vector_action_space_size), - ', '.join(self.vector_action_descriptions)) + str(self.number_visual_observations), + self.vector_observation_space_type, + str(self.vector_observation_space_size), + str(self.num_stacked_vector_observations), + self.vector_action_space_type, + str(self.vector_action_space_size), + ', '.join(self.vector_action_descriptions)) diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index 42fb4c8138..a6c291ba45 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -8,7 +8,7 @@ import numpy as np import tensorflow as tf -from unityagents import AllBrainInfo +from unityagents import AllBrainInfo, BrainInfo from unitytrainers.buffer import Buffer from unitytrainers.ppo.models import PPOModel from unitytrainers.trainer import UnityTrainerException, Trainer @@ -196,10 +196,51 @@ def take_action(self, all_brain_info: AllBrainInfo): else: return run_out[self.model.output], None, None, run_out + def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo: + """ + Constructs a BrainInfo which contains the lost recent previous experiences for all agents info + which correspond to the agents in a provided next_info. + :BrainInfo next_info: A t+1 BrainInfo. + :return: + """ + visual_observations = [[]] + vector_observations = [] + text_observations = [] + memories = [] + rewards = [] + local_dones = [] + max_reacheds = [] + agents = [] + prev_vector_actions = [] + prev_text_actions = [] + for agent_id in next_info.agents: + agent_brain_info = self.training_buffer[agent_id].last_brain_info + if agent_brain_info is None: + agent_brain_info = next_info + for i in range(len(next_info.visual_observations)): + visual_observations[i].append( + agent_brain_info.visual_observations[i][agent_brain_info.agents.index(agent_id)]) + vector_observations.append(agent_brain_info.vector_observations[agent_brain_info.agents.index(agent_id)]) + text_observations.append(agent_brain_info.text_observations[agent_brain_info.agents.index(agent_id)]) + if self.use_recurrent: + memories.append(agent_brain_info.memories[agent_brain_info.agents.index(agent_id)]) + rewards.append(agent_brain_info.rewards[agent_brain_info.agents.index(agent_id)]) + local_dones.append(agent_brain_info.local_done[agent_brain_info.agents.index(agent_id)]) + max_reacheds.append(agent_brain_info.max_reached[agent_brain_info.agents.index(agent_id)]) + agents.append(agent_brain_info.agents[agent_brain_info.agents.index(agent_id)]) + prev_vector_actions.append( + agent_brain_info.previous_vector_actions[agent_brain_info.agents.index(agent_id)]) + prev_text_actions.append(agent_brain_info.previous_text_actions[agent_brain_info.agents.index(agent_id)]) + curr_info = BrainInfo(visual_observations, vector_observations, text_observations, memories, rewards, + agents, + local_dones, prev_vector_actions, prev_text_actions, max_reacheds) + return curr_info + def generate_intrinsic_rewards(self, curr_info, next_info): """ Generates intrinsic reward used for Curiosity-based training. - :param next_info: Next BrainInfo. + :BrainInfo curr_info: Current BrainInfo. + :BrainInfo next_info: Next BrainInfo. :return: Intrinsic rewards for all agents. """ if self.use_curiosity: @@ -209,64 +250,23 @@ def generate_intrinsic_rewards(self, curr_info, next_info): else: feed_dict[self.model.action_holder] = next_info.previous_vector_actions.flatten() - if curr_info.agents == next_info.agents: - if self.use_visual_obs: - for i in range(len(curr_info.visual_observations)): - feed_dict[self.model.visual_in[i]] = curr_info.visual_observations[i] - feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i] - if self.use_vector_obs: - feed_dict[self.model.vector_in] = curr_info.vector_observations - feed_dict[self.model.next_vector_in] = next_info.vector_observations - if self.use_recurrent: - if curr_info.memories.shape[1] == 0: - curr_info.memories = np.zeros((len(curr_info.agents), self.m_size)) - feed_dict[self.model.memory_in] = curr_info.memories - intrinsic_rewards = self.sess.run(self.model.intrinsic_reward, - feed_dict=feed_dict) * float(self.has_updated) - return intrinsic_rewards - else: - agent_index_to_ignore = [] - for agent_index, agent_id in enumerate(next_info.agents): - if self.training_buffer[agent_id].last_brain_info is None: - agent_index_to_ignore.append(agent_index) - if self.use_visual_obs: - for i in range(len(next_info.visual_observations)): - tmp = [] - for agent_id in next_info.agents: - agent_brain_info = self.training_buffer[agent_id].last_brain_info - if agent_brain_info is None: - agent_brain_info = next_info - agent_obs = agent_brain_info.visual_observations[i][agent_brain_info.agents.index(agent_id)] - tmp += [agent_obs] - feed_dict[self.model.visual_in[i]] = np.array(tmp) - feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i] - if self.use_vector_obs: - tmp = [] - for agent_id in next_info.agents: - agent_brain_info = self.training_buffer[agent_id].last_brain_info - if agent_brain_info is None: - agent_brain_info = next_info - agent_obs = agent_brain_info.vector_observations[agent_brain_info.agents.index(agent_id)] - tmp += [agent_obs] - feed_dict[self.model.vector_in] = np.array(tmp) - feed_dict[self.model.next_vector_in] = next_info.vector_observations - if self.use_recurrent: - tmp = [] - for agent_id in next_info.agents: - agent_brain_info = self.training_buffer[agent_id].last_brain_info - if agent_brain_info is None: - agent_brain_info = next_info - if agent_brain_info.memories.shape[1] == 0: - agent_obs = np.zeros(self.m_size) - else: - agent_obs = agent_brain_info.memories[agent_brain_info.agents.index(agent_id)] - tmp += [agent_obs] - feed_dict[self.model.memory_in] = np.array(tmp) - intrinsic_rewards = self.sess.run(self.model.intrinsic_reward, - feed_dict=feed_dict) * float(self.has_updated) - for index in agent_index_to_ignore: - intrinsic_rewards[index] = 0 - return intrinsic_rewards + if curr_info.agents != next_info.agents: + curr_info = self.construct_curr_info(next_info) + + if self.use_visual_obs: + for i in range(len(curr_info.visual_observations)): + feed_dict[self.model.visual_in[i]] = curr_info.visual_observations[i] + feed_dict[self.model.next_visual_in[i]] = next_info.visual_observations[i] + if self.use_vector_obs: + feed_dict[self.model.vector_in] = curr_info.vector_observations + feed_dict[self.model.next_vector_in] = next_info.vector_observations + if self.use_recurrent: + if curr_info.memories.shape[1] == 0: + curr_info.memories = np.zeros((len(curr_info.agents), self.m_size)) + feed_dict[self.model.memory_in] = curr_info.memories + intrinsic_rewards = self.sess.run(self.model.intrinsic_reward, + feed_dict=feed_dict) * float(self.has_updated) + return intrinsic_rewards else: return None From 89e6001984996c3b01dc4b079317a71e44a695a5 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Thu, 28 Jun 2018 17:21:18 -0700 Subject: [PATCH 4/6] Code clean-up --- python/unitytrainers/ppo/trainer.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index a6c291ba45..84c3d13c63 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -215,25 +215,23 @@ def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo: prev_text_actions = [] for agent_id in next_info.agents: agent_brain_info = self.training_buffer[agent_id].last_brain_info + agent_index = agent_brain_info.agents.index(agent_id) if agent_brain_info is None: agent_brain_info = next_info for i in range(len(next_info.visual_observations)): - visual_observations[i].append( - agent_brain_info.visual_observations[i][agent_brain_info.agents.index(agent_id)]) - vector_observations.append(agent_brain_info.vector_observations[agent_brain_info.agents.index(agent_id)]) - text_observations.append(agent_brain_info.text_observations[agent_brain_info.agents.index(agent_id)]) + visual_observations[i].append(agent_brain_info.visual_observations[i][agent_index]) + vector_observations.append(agent_brain_info.vector_observations[agent_index]) + text_observations.append(agent_brain_info.text_observations[agent_index]) if self.use_recurrent: - memories.append(agent_brain_info.memories[agent_brain_info.agents.index(agent_id)]) - rewards.append(agent_brain_info.rewards[agent_brain_info.agents.index(agent_id)]) - local_dones.append(agent_brain_info.local_done[agent_brain_info.agents.index(agent_id)]) - max_reacheds.append(agent_brain_info.max_reached[agent_brain_info.agents.index(agent_id)]) - agents.append(agent_brain_info.agents[agent_brain_info.agents.index(agent_id)]) - prev_vector_actions.append( - agent_brain_info.previous_vector_actions[agent_brain_info.agents.index(agent_id)]) - prev_text_actions.append(agent_brain_info.previous_text_actions[agent_brain_info.agents.index(agent_id)]) + memories.append(agent_brain_info.memories[agent_index]) + rewards.append(agent_brain_info.rewards[agent_index]) + local_dones.append(agent_brain_info.local_done[agent_index]) + max_reacheds.append(agent_brain_info.max_reached[agent_index]) + agents.append(agent_brain_info.agents[agent_index]) + prev_vector_actions.append(agent_brain_info.previous_vector_actions[agent_index]) + prev_text_actions.append(agent_brain_info.previous_text_actions[agent_index]) curr_info = BrainInfo(visual_observations, vector_observations, text_observations, memories, rewards, - agents, - local_dones, prev_vector_actions, prev_text_actions, max_reacheds) + agents, local_dones, prev_vector_actions, prev_text_actions, max_reacheds) return curr_info def generate_intrinsic_rewards(self, curr_info, next_info): From 06e4b76b2bf22f9be3f5f2879feb002cee3777fe Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Thu, 28 Jun 2018 17:23:38 -0700 Subject: [PATCH 5/6] Add to docstring --- python/unitytrainers/ppo/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index 84c3d13c63..849ef00f9d 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -201,7 +201,7 @@ def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo: Constructs a BrainInfo which contains the lost recent previous experiences for all agents info which correspond to the agents in a provided next_info. :BrainInfo next_info: A t+1 BrainInfo. - :return: + :return: curr_info: Reconstructed BrainInfo to match agents of next_info. """ visual_observations = [[]] vector_observations = [] From f497a275de628ae17f7ab919ce8ccfac3a18e35d Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Thu, 28 Jun 2018 17:28:14 -0700 Subject: [PATCH 6/6] Fix typo --- python/unitytrainers/ppo/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index 849ef00f9d..92c36d5e29 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -198,7 +198,7 @@ def take_action(self, all_brain_info: AllBrainInfo): def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo: """ - Constructs a BrainInfo which contains the lost recent previous experiences for all agents info + Constructs a BrainInfo which contains the most recent previous experiences for all agents info which correspond to the agents in a provided next_info. :BrainInfo next_info: A t+1 BrainInfo. :return: curr_info: Reconstructed BrainInfo to match agents of next_info.