Bugfix/state (#189)

* make GAE schedulable * make cliprewards toggleable * pass on type error * run formatter * more robust fix for life_lost
cpnota · Dec 29, 2020 · fb28f66 · fb28f66
1 parent d2dc3ab
commit fb28f66
Show file tree

Hide file tree

Showing 7 changed files with 17 additions and 4 deletions.
diff --git a/all/bodies/atari.py b/all/bodies/atari.py
@@ -5,9 +5,10 @@
 
 
 class DeepmindAtariBody(Body):
-    def __init__(self, agent, lazy_frames=False, episodic_lives=True, frame_stack=4):
+    def __init__(self, agent, lazy_frames=False, episodic_lives=True, frame_stack=4, clip_rewards=True):
         agent = FrameStack(agent, lazy=lazy_frames, size=frame_stack)
-        agent = ClipRewards(agent)
+        if clip_rewards:
+            agent = ClipRewards(agent)
         if episodic_lives:
             agent = EpisodicLives(agent)
         super().__init__(agent)

diff --git a/all/bodies/vision.py b/all/bodies/vision.py
@@ -28,6 +28,7 @@ class TensorDeviceCache:
     To efficiently implement device trasfer of lazy states, this class
     caches the transfered tensor so that it is not copied multiple times.
     '''
+
     def __init__(self, max_size=16):
         self.max_size = max_size
         self.cache_data = []

diff --git a/all/core/state.py b/all/core/state.py
@@ -31,6 +31,7 @@ class State(dict):
         device (string):
             The torch device on which component tensors are stored.
     """
+
     def __init__(self, x, device='cpu', **kwargs):
         if not isinstance(x, dict):
             x = {'observation': x}
@@ -260,6 +261,7 @@ class StateArray(State):
             device (string):
                 The torch device on which component tensors are stored.
     """
+
     def __init__(self, x, shape, device='cpu', **kwargs):
         if not isinstance(x, dict):
             x = {'observation': x}

diff --git a/all/environments/atari.py b/all/environments/atari.py
@@ -7,6 +7,7 @@
     WarpFrame,
     LifeLostEnv,
 )
+from all.core import State
 
 
 class AtariEnvironment(GymEnvironment):
@@ -31,6 +32,11 @@ def __init__(self, name, *args, **kwargs):
     def name(self):
         return self._name
 
+    def reset(self):
+        state = self._env.reset(), 0., False, {'life_lost': False}
+        self._state = State.from_gym(state, dtype=self._env.observation_space.dtype, device=self._device)
+        return self._state
+
     def duplicate(self, n):
         return [
             AtariEnvironment(self._name, *self._args, **self._kwargs) for _ in range(n)

diff --git a/all/environments/gym.py b/all/environments/gym.py
@@ -41,7 +41,8 @@ def name(self):
         return self._name
 
     def reset(self):
-        self._state = State.from_gym(self._env.reset(), dtype=self._env.observation_space.dtype, device=self._device)
+        state = self._env.reset(), 0., False, None
+        self._state = State.from_gym(state, dtype=self._env.observation_space.dtype, device=self._device)
         return self._state
 
     def step(self, action):

diff --git a/all/memory/generalized_advantage.py b/all/memory/generalized_advantage.py
@@ -1,8 +1,9 @@
 import torch
 from all.core import State
+from all.optim import Schedulable
 
 
-class GeneralizedAdvantageBuffer:
+class GeneralizedAdvantageBuffer(Schedulable):
     def __init__(
             self,
             v,

diff --git a/all/memory/replay_buffer.py b/all/memory/replay_buffer.py
@@ -151,6 +151,7 @@ def _sample_proportional(self, batch_size):
 
 class NStepReplayBuffer(ReplayBuffer):
     '''Converts any ReplayBuffer into an NStepReplayBuffer'''
+
     def __init__(
             self,
             steps,