rm firezap, add nearby observation, add build principal

google-deepmind · Mar 13, 2024 · 567986c · 567986c
1 parent d6847dc
commit 567986c
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 21 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -40,7 +40,7 @@
   "[python]": {
     "editor.defaultFormatter": "ms-python.black-formatter",
     "editor.codeActionsOnSave": {
-      "source.organizeImports": true
+      "source.organizeImports": "explicit"
     }
   },
   "python.formatting.provider": "none",

diff --git a/meltingpot/configs/substrates/commons_harvest__open.py b/meltingpot/configs/substrates/commons_harvest__open.py
@@ -247,17 +247,16 @@
     ]
 }
 
-# Primitive action components.
+# Primitive action components. with zap removed
 # pylint: disable=bad-whitespace
 # pyformat: disable
-NOOP       = {"move": 0, "turn":  0, "fireZap": 0}
-FORWARD    = {"move": 1, "turn":  0, "fireZap": 0}
-STEP_RIGHT = {"move": 2, "turn":  0, "fireZap": 0}
-BACKWARD   = {"move": 3, "turn":  0, "fireZap": 0}
-STEP_LEFT  = {"move": 4, "turn":  0, "fireZap": 0}
-TURN_LEFT  = {"move": 0, "turn": -1, "fireZap": 0}
-TURN_RIGHT = {"move": 0, "turn":  1, "fireZap": 0}
-FIRE_ZAP   = {"move": 0, "turn":  0, "fireZap": 1}
+NOOP       = {"move": 0, "turn":  0}
+FORWARD    = {"move": 1, "turn":  0}
+STEP_RIGHT = {"move": 2, "turn":  0}
+BACKWARD   = {"move": 3, "turn":  0}
+STEP_LEFT  = {"move": 4, "turn":  0}
+TURN_LEFT  = {"move": 0, "turn": -1}
+TURN_RIGHT = {"move": 0, "turn":  1}
 # pyformat: enable
 # pylint: enable=bad-whitespace
 
@@ -269,7 +268,6 @@
     STEP_RIGHT,
     TURN_LEFT,
     TURN_RIGHT,
-    FIRE_ZAP,
 )
 
 TARGET_SPRITE_SELF = {
@@ -473,11 +471,10 @@ def create_avatar_object(player_idx: int,
                   "speed": 1.0,
                   "spawnGroup": spawn_group,
                   "postInitialSpawnGroup": "spawnPoints",
-                  "actionOrder": ["move", "turn", "fireZap"],
+                  "actionOrder": ["move", "turn"],
                   "actionSpec": {
                       "move": {"default": 0, "min": 0, "max": len(_COMPASS)},
                       "turn": {"default": 0, "min": -1, "max": 1},
-                      "fireZap": {"default": 0, "min": 0, "max": 1},
                   },
                   "view": {
                       "left": 5,
@@ -500,9 +497,6 @@ def create_avatar_object(player_idx: int,
                   "rewardForZapping": 0,
               }
           },
-          {
-              "component": "ReadyToShootObservation",
-          },
       ]
   }
   if _ENABLE_DEBUG_OBSERVATIONS:
@@ -540,7 +534,7 @@ def get_config():
   # Observation format configuration.
   config.individual_observation_names = [
       "RGB",
-      "READY_TO_SHOOT",
+      "NEARBY"
   ]
   config.global_observation_names = [
       "WORLD.RGB",
@@ -550,9 +544,9 @@ def get_config():
   config.action_spec = specs.action(len(ACTION_SET))
   config.timestep_spec = specs.timestep({
       "RGB": specs.OBSERVATION["RGB"],
-      "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"],
       # Debug only (do not use the following observations in policies).
       "WORLD.RGB": specs.rgb(144, 192),
+      ".NEARBY": specs.int32(10)
   })
 
   # The roles assigned to each player.

diff --git a/meltingpot/lua/modules/avatar_library.lua b/meltingpot/lua/modules/avatar_library.lua
@@ -274,6 +274,31 @@ function Avatar:addObservations(tileSet, world, observations)
       end
   }
   observations[#observations + 1] = spec
+
+  observations[#observations + 1] = {
+    name = stringId .. '.NEARBY',
+    type = 'tensor.Int32Tensor',
+    shape = {},
+    func = function(grid)
+        -- List of avatar ids
+        local resultsList = {}
+        local objectsOnLayer = self:queryPartialObservationWindow("upperPhysical")
+        for _, object in ipairs(objectsOnLayer) do
+        if object:hasComponent('Avatar') then
+            local index = object:getComponent('Avatar'):getIndex()
+            table.insert(resultsList, index)
+        end
+        end
+        -- Then reformat list as int32 tensor to output
+        local numPlayers = self.gameObject.simulation:getNumPlayers()
+        local resultTensor = tensor.Int32Tensor(numPlayers):fill(0)
+        for _, avatarId in ipairs(resultsList) do
+            resultTensor(avatarId):add(1)
+        end
+        return resultTensor
+    end
+}
+
 end
 
 function Avatar:reset()

diff --git a/meltingpot/scenario.py b/meltingpot/scenario.py
@@ -56,7 +56,8 @@ def _scenarios_by_substrate() -> Mapping[str, Collection[str]]:
     'STAMINA',
     'VOTING',
     # An extra observation that is never necessary but could perhaps help.
-    'COLLECTIVE_REWARD'
+    'COLLECTIVE_REWARD',
+    'NEARBY'
 })
 
 

diff --git a/meltingpot/substrate.py b/meltingpot/substrate.py
@@ -20,6 +20,9 @@
 from meltingpot.utils.substrates import substrate_factory
 from ml_collections import config_dict
 
+from SocialEnvDesign import principal_substrate
+from SocialEnvDesign.principal import Principal
+
 SUBSTRATES = substrate_configs.SUBSTRATES
 
 
@@ -59,6 +62,25 @@ def build_from_config(
   """
   return get_factory_from_config(config).build(roles)
 
+def build_principal_from_config(
+    config: config_dict.ConfigDict,
+    *,
+    roles: Sequence[str],
+    principal: Principal
+) -> principal_substrate.PrincipalSubstrate:
+  """Builds a substrate from the provided config.
+
+  Args:
+    config: config resulting from `get_config`.
+    roles: sequence of strings defining each player's role. The length of
+      this sequence determines the number of players.
+    principal: the principal
+
+  Returns:
+    The training substrate.
+  """
+  return get_factory_from_config(config).build_principal(roles, principal)
+
 
 def get_factory(name: str) -> substrate_factory.SubstrateFactory:
   """Returns the factory for the specified substrate."""

diff --git a/meltingpot/utils/substrates/specs.py b/meltingpot/utils/substrates/specs.py
@@ -34,8 +34,6 @@
     shape=(), dtype=np.float64, minimum=0, maximum=1, name='discount')
 REWARD = dm_env.specs.Array(shape=(), dtype=np.float64, name='reward')
 OBSERVATION = immutabledict.immutabledict({
-    'READY_TO_SHOOT': dm_env.specs.Array(
-        shape=(), dtype=np.float64, name='READY_TO_SHOOT'),
     'RGB': dm_env.specs.Array(shape=(88, 88, 3), dtype=np.uint8, name='RGB'),
     'POSITION': dm_env.specs.Array(shape=(2,), dtype=np.int32, name='POSITION'),
     'ORIENTATION': dm_env.specs.Array(

diff --git a/meltingpot/utils/substrates/substrate_factory.py b/meltingpot/utils/substrates/substrate_factory.py
@@ -20,6 +20,9 @@
 from meltingpot.utils.substrates import builder
 from meltingpot.utils.substrates import substrate
 
+from SocialEnvDesign import principal_substrate
+from SocialEnvDesign.principal import Principal
+
 
 class SubstrateFactory:
   """Factory for building specific substrates."""
@@ -93,3 +96,20 @@ def build(self, roles: Sequence[str]) -> substrate.Substrate:
         individual_observations=self._individual_observations,
         global_observations=self._global_observations,
         action_table=self._action_table)
+
+  def build_principal(self, roles: Sequence[str], principal: Principal) -> principal_substrate.PrincipalSubstrate:
+    """Builds the substrate.
+
+    Args:
+      roles: the role each player will take.
+
+    Returns:
+      The constructed substrate.
+    """
+    return principal_substrate.build_substrate(
+        lab2d_settings=self._lab2d_settings_builder(roles),
+        individual_observations=self._individual_observations,
+        global_observations=self._global_observations,
+        action_table=self._action_table,
+        principal=principal
+        )