Feature/upgrade deps (#268)

* update gym/pettingzoo dependencies * update gym and pettingzoo versions * update unit tests * remove custom gym env due to broken duplicate functionality * update torch etc to latest * address PettingZoo deprecation warnings * update github workflow * Update python-package.yml * Update python-package.yml * Update python-package.yml * Update python-package.yml * remove python 3.6 * support python versions 3.8 and 3.9 * make sure torch versions match * linting Co-authored-by: Nota, Christopher <cnota@irobot.com>
cpnota · Apr 12, 2022 · 7df8e05 · 7df8e05
1 parent 3804586
commit 7df8e05
Show file tree

Hide file tree

Showing 16 changed files with 141 additions and 120 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.8, 3.9]
 
     steps:
     - uses: actions/checkout@v2
@@ -27,10 +27,8 @@ jobs:
       run: |
         sudo apt-get install swig
         sudo apt-get install unrar
-        pip install torch==1.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch~=1.11 --extra-index-url https://download.pytorch.org/whl/cpu
         make install
-        AutoROM -v
-        python -m atari_py.import_roms $(python -c 'import site; print(site.getsitepackages()[0])')/multi_agent_ale_py/ROM
     - name: Lint code
       run: |
         make lint

diff --git a/Makefile b/Makefile
@@ -1,10 +1,11 @@
 install:
 	pip install -e .[dev]
+	AutoROM -y --quiet
 
 test: unit-test integration-test
 
 unit-test:
-	python -m unittest discover -s all -p "*test.py"
+	python -m unittest discover -s all -p "*test.py" -t .
 
 integration-test:
 	python -m unittest discover -s integration -p "*test.py"

diff --git a/all/environments/_multiagent_environment.py b/all/environments/_multiagent_environment.py
@@ -81,20 +81,17 @@ def state(self):
     def name(self):
         '''str: The name of the environment.'''
 
-    @property
     @abstractmethod
-    def state_spaces(self):
-        '''A dictionary of state spaces for each agent.'''
+    def state_space(self, agent_id):
+        '''The state space for the given agent.'''
 
-    @property
-    def observation_spaces(self):
-        '''Alias for MultiagentEnvironment.state_spaces.'''
-        return self.state_space
+    def observation_space(self, agent_id):
+        '''Alias for MultiagentEnvironment.state_space(agent_id).'''
+        return self.state_space(agent_id)
 
-    @property
     @abstractmethod
-    def action_spaces(self):
-        '''A dictionary of action spaces for each agent.'''
+    def action_space(self):
+        '''The action space for the given agent.'''
 
     @property
     @abstractmethod

diff --git a/all/environments/atari.py b/all/environments/atari.py
@@ -1,44 +1,91 @@
 import gym
-from .gym import GymEnvironment
+import torch
+from all.core import State
+from .duplicate_env import DuplicateEnvironment
 from .atari_wrappers import (
     NoopResetEnv,
     MaxAndSkipEnv,
     FireResetEnv,
     WarpFrame,
     LifeLostEnv,
 )
-from all.core import State
-from .duplicate_env import DuplicateEnvironment
+from ._environment import Environment
 
 
-class AtariEnvironment(GymEnvironment):
-    def __init__(self, name, *args, **kwargs):
-        # need these for duplication
-        self._args = args
-        self._kwargs = kwargs
+class AtariEnvironment(Environment):
+    def __init__(self, name, device='cpu'):
+
         # construct the environment
         env = gym.make(name + "NoFrameskip-v4")
+
         # apply a subset of wrappers
         env = NoopResetEnv(env, noop_max=30)
         env = MaxAndSkipEnv(env)
         if "FIRE" in env.unwrapped.get_action_meanings():
             env = FireResetEnv(env)
         env = WarpFrame(env)
         env = LifeLostEnv(env)
-        # initialize
-        super().__init__(env, *args, **kwargs)
+
+        # initialize member variables
+        self._env = env
         self._name = name
+        self._state = None
+        self._action = None
+        self._reward = None
+        self._done = True
+        self._info = None
+        self._device = device
+
+    def reset(self):
+        state = self._env.reset(), 0., False, None
+        self._state = State.from_gym(state, dtype=self._env.observation_space.dtype, device=self._device)
+        return self._state
+
+    def step(self, action):
+        self._state = State.from_gym(
+            self._env.step(self._convert(action)),
+            dtype=self._env.observation_space.dtype,
+            device=self._device
+        )
+        return self._state
+
+    def render(self, **kwargs):
+        return self._env.render(**kwargs)
+
+    def close(self):
+        return self._env.close()
+
+    def seed(self, seed):
+        self._env.seed(seed)
+
+    def duplicate(self, n):
+        return DuplicateEnvironment([AtariEnvironment(self._name, device=self._device) for _ in range(n)])
 
     @property
     def name(self):
         return self._name
 
-    def reset(self):
-        state = self._env.reset(), 0., False, {'life_lost': False}
-        self._state = State.from_gym(state, dtype=self._env.observation_space.dtype, device=self._device)
+    @property
+    def state_space(self):
+        return self._env.observation_space
+
+    @property
+    def action_space(self):
+        return self._env.action_space
+
+    @property
+    def state(self):
         return self._state
 
-    def duplicate(self, n):
-        return DuplicateEnvironment([
-            AtariEnvironment(self._name, *self._args, **self._kwargs) for _ in range(n)
-        ])
+    @property
+    def env(self):
+        return self._env
+
+    @property
+    def device(self):
+        return self._device
+
+    def _convert(self, action):
+        if torch.is_tensor(action):
+            return action.item()
+        return action
diff --git a/all/environments/gym.py b/all/environments/gym.py
@@ -3,7 +3,6 @@
 from all.core import State
 from ._environment import Environment
 from .duplicate_env import DuplicateEnvironment
-import cloudpickle
 gym.logger.set_level(40)
 
 
@@ -24,15 +23,10 @@ class GymEnvironment(Environment):
         device (str, optional): the device on which tensors will be stored
     '''
 
-    def __init__(self, env, device=torch.device('cpu'), name=None):
-        if isinstance(env, str):
-            self._name = env
-            env = gym.make(env)
-        else:
-            self._name = env.__class__.__name__
-        if name:
-            self._name = name
-        self._env = env
+    def __init__(self, id, device=torch.device('cpu'), name=None):
+        self._env = gym.make(id)
+        self._id = id
+        self._name = name if name else id
         self._state = None
         self._action = None
         self._reward = None
@@ -67,7 +61,7 @@ def seed(self, seed):
         self._env.seed(seed)
 
     def duplicate(self, n):
-        return DuplicateEnvironment([GymEnvironment(cloudpickle.loads(cloudpickle.dumps(self._env)), device=self.device) for _ in range(n)])
+        return DuplicateEnvironment([GymEnvironment(self._id, device=self.device, name=self._name) for _ in range(n)])
 
     @property
     def state_space(self):

diff --git a/all/environments/gym_test.py b/all/environments/gym_test.py
@@ -8,10 +8,6 @@ def test_env_name(self):
         env = GymEnvironment('CartPole-v0')
         self.assertEqual(env.name, 'CartPole-v0')
 
-    def test_preconstructed_env_name(self):
-        env = GymEnvironment(gym.make('Blackjack-v0'))
-        self.assertEqual(env.name, 'BlackjackEnv')
-
     def test_reset(self):
         env = GymEnvironment('CartPole-v0')
         state = env.reset()
@@ -20,14 +16,6 @@ def test_reset(self):
         self.assertFalse(state.done)
         self.assertEqual(state.mask, 1)
 
-    def test_reset_preconstructed_env(self):
-        env = GymEnvironment(gym.make('CartPole-v0'))
-        state = env.reset()
-        self.assertEqual(state.observation.shape, (4,))
-        self.assertEqual(state.reward, 0)
-        self.assertFalse(state.done)
-        self.assertEqual(state.mask, 1)
-
     def test_step(self):
         env = GymEnvironment('CartPole-v0')
         env.reset()

diff --git a/all/environments/multiagent_atari_test.py b/all/environments/multiagent_atari_test.py
@@ -49,14 +49,14 @@ def test_agent_iter(self):
         self.assertEqual(next(it), 'first_0')
 
     def test_state_spaces(self):
-        state_spaces = MultiagentAtariEnv('pong_v2', device='cpu').state_spaces
-        self.assertEqual(state_spaces['first_0'].shape, (1, 84, 84))
-        self.assertEqual(state_spaces['second_0'].shape, (1, 84, 84))
+        env = MultiagentAtariEnv('pong_v2', device='cpu')
+        self.assertEqual(env.state_space('first_0').shape, (1, 84, 84))
+        self.assertEqual(env.state_space('second_0').shape, (1, 84, 84))
 
     def test_action_spaces(self):
-        action_spaces = MultiagentAtariEnv('pong_v2', device='cpu').action_spaces
-        self.assertEqual(action_spaces['first_0'].n, 18)
-        self.assertEqual(action_spaces['second_0'].n, 18)
+        env = MultiagentAtariEnv('pong_v2', device='cpu')
+        self.assertEqual(env.action_space('first_0').n, 18)
+        self.assertEqual(env.action_space('second_0').n, 18)
 
     def test_list_agents(self):
         env = MultiagentAtariEnv('pong_v2', device='cpu')

diff --git a/all/environments/multiagent_pettingzoo.py b/all/environments/multiagent_pettingzoo.py
@@ -27,7 +27,7 @@ def __init__(self, zoo_env, name, device='cuda'):
         self._device = device
         self.agents = self._env.agents
         self.subenvs = {
-            agent: SubEnv(agent, device, self.state_spaces[agent], self.action_spaces[agent])
+            agent: SubEnv(agent, device, self.state_space(agent), self.action_space(agent))
             for agent in self.agents
         }
 
@@ -79,7 +79,7 @@ def duplicate(self, n):
 
     def last(self):
         observation, reward, done, info = self._env.last()
-        selected_obs_space = self._env.observation_spaces[self._env.agent_selection]
+        selected_obs_space = self._env.observation_space(self._env.agent_selection)
         return MultiagentState.from_zoo(self._env.agent_selection, (observation, reward, done, info), device=self._device, dtype=selected_obs_space.dtype)
 
     @property
@@ -94,21 +94,15 @@ def device(self):
     def agent_selection(self):
         return self._env.agent_selection
 
-    @property
-    def state_spaces(self):
-        return self._env.observation_spaces
-
-    @property
-    def observation_spaces(self):
-        return self._env.observation_spaces
+    def state_space(self, agent_id):
+        return self._env.observation_space(agent_id)
 
-    @property
-    def action_spaces(self):
-        return self._env.action_spaces
+    def action_space(self, agent_id):
+        return self._env.action_space(agent_id)
 
     def _convert(self, action):
         agent = self._env.agent_selection
-        action_space = self._env.action_spaces[agent]
+        action_space = self.action_space(agent)
         if torch.is_tensor(action):
             if isinstance(action_space, gym.spaces.Discrete):
                 return action.item()

diff --git a/all/environments/multiagent_pettingzoo_test.py b/all/environments/multiagent_pettingzoo_test.py
@@ -48,14 +48,14 @@ def test_agent_iter(self):
         self.assertEqual(next(it), 'leadadversary_0')
 
     def test_state_spaces(self):
-        state_spaces = self._make_env().state_spaces
-        self.assertEqual(state_spaces['leadadversary_0'].shape, (34,))
-        self.assertEqual(state_spaces['adversary_0'].shape, (34,))
+        env = self._make_env()
+        self.assertEqual(env.state_space('leadadversary_0').shape, (34,))
+        self.assertEqual(env.state_space('adversary_0').shape, (34,))
 
     def test_action_spaces(self):
-        action_spaces = self._make_env().action_spaces
-        self.assertEqual(action_spaces['leadadversary_0'].n, 20)
-        self.assertEqual(action_spaces['adversary_0'].n, 5)
+        env = self._make_env()
+        self.assertEqual(env.action_space('leadadversary_0').n, 20)
+        self.assertEqual(env.action_space('adversary_0').n, 5)
 
     def test_list_agents(self):
         env = self._make_env()
@@ -83,8 +83,8 @@ def test_variable_spaces(self):
         # tests that action spaces work
         for agent in env.agents:
             state = env.last()
-            self.assertTrue(env.observation_spaces[agent].contains(state['observation'].cpu().detach().numpy()))
-            env.step(env.action_spaces[env.agent_selection].sample())
+            self.assertTrue(env.observation_space(agent).contains(state['observation'].cpu().detach().numpy()))
+            env.step(env.action_space(env.agent_selection).sample())
 
     def _make_env(self):
         return MultiagentPettingZooEnv(simple_world_comm_v2.env(), name="simple_world_comm_v2", device='cpu')

diff --git a/all/environments/vector_env.py b/all/environments/vector_env.py
@@ -2,7 +2,6 @@
 import torch
 from all.core import StateArray
 from ._vector_environment import VectorEnvironment
-import cloudpickle
 import numpy as np
 
 
@@ -13,7 +12,7 @@ class GymVectorEnvironment(VectorEnvironment):
 
     This wrapper converts the output of the vector environment to PyTorch tensors,
     and wraps them in a StateArray object that can be passed to a Parallel Agent.
-    This constructor accepts a preconstructed gym vetor environment. Note that
+    This constructor accepts a preconstructed gym vector environment. Note that
     in the latter case, the name property is set to be the whatever the name
     of the outermost wrapper on the environment is.
 

diff --git a/all/experiments/experiment.py b/all/experiments/experiment.py
@@ -1,7 +1,5 @@
 from abc import ABC, abstractmethod
 import numpy as np
-from scipy import stats
-import torch
 
 
 class Experiment(ABC):
@@ -76,7 +74,9 @@ def _log_test_episode(self, episode, returns):
 
     def _log_test(self, returns):
         if not self._quiet:
-            print('test returns (mean ± sem): {} ± {}'.format(np.mean(returns), stats.sem(returns)))
+            mean = np.mean(returns)
+            sem = np.variance(returns) / np.sqrt(len(returns))
+            print('test returns (mean ± sem): {} ± {}'.format(mean, sem))
         self._writer.add_summary('returns-test', np.mean(returns), np.std(returns))
 
     def save(self):

diff --git a/all/experiments/multiagent_env_experiment.py b/all/experiments/multiagent_env_experiment.py
@@ -1,8 +1,6 @@
 from timeit import default_timer as timer
 import numpy as np
-from scipy import stats
 from .writer import ExperimentWriter, CometWriter
-from .experiment import Experiment
 
 
 class MultiagentEnvExperiment():
@@ -166,7 +164,9 @@ def _log_test_episode(self, episode, returns):
     def _log_test(self, returns):
         for agent, agent_returns in returns.items():
             if not self._quiet:
-                print('{} test returns (mean ± sem): {} ± {}'.format(agent, np.mean(agent_returns), stats.sem(agent_returns)))
+                mean = np.mean(agent_returns)
+                sem = np.variance(agent_returns) / np.sqrt(len(agent_returns))
+                print('{} test returns (mean ± sem): {} ± {}'.format(agent, mean, sem))
             self._writer.add_summary('{}/returns-test'.format(agent), np.mean(agent_returns), np.std(agent_returns))
 
     def _save_model(self):

diff --git a/all/experiments/parallel_env_experiment_test.py b/all/experiments/parallel_env_experiment_test.py
@@ -38,7 +38,7 @@ def test_writes_training_returns_eps(self):
         )
         np.testing.assert_equal(
             self.experiment._writer.data["evaluation/returns/episode"]["values"],
-            np.array([10., 11., 17.]),
+            np.array([10., 12., 19.]),
         )
 
     def test_writes_test_returns(self):