Skip to content

Commit

Permalink
Feature/upgrade deps (#268)
Browse files Browse the repository at this point in the history
* update gym/pettingzoo dependencies

* update gym and pettingzoo versions

* update unit tests

* remove custom gym env due to broken duplicate functionality

* update torch etc to latest

* address PettingZoo deprecation warnings

* update github workflow

* Update python-package.yml

* Update python-package.yml

* Update python-package.yml

* Update python-package.yml

* remove python 3.6

* support python versions 3.8 and 3.9

* make sure torch versions match

* linting

Co-authored-by: Nota, Christopher <cnota@irobot.com>
  • Loading branch information
cpnota and Nota, Christopher committed Apr 12, 2022
1 parent 3804586 commit 7df8e05
Show file tree
Hide file tree
Showing 16 changed files with 141 additions and 120 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: [3.8, 3.9]

steps:
- uses: actions/checkout@v2
Expand All @@ -27,10 +27,8 @@ jobs:
run: |
sudo apt-get install swig
sudo apt-get install unrar
pip install torch==1.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip install torch~=1.11 --extra-index-url https://download.pytorch.org/whl/cpu
make install
AutoROM -v
python -m atari_py.import_roms $(python -c 'import site; print(site.getsitepackages()[0])')/multi_agent_ale_py/ROM
- name: Lint code
run: |
make lint
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
install:
pip install -e .[dev]
AutoROM -y --quiet

test: unit-test integration-test

unit-test:
python -m unittest discover -s all -p "*test.py"
python -m unittest discover -s all -p "*test.py" -t .

integration-test:
python -m unittest discover -s integration -p "*test.py"
Expand Down
17 changes: 7 additions & 10 deletions all/environments/_multiagent_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,17 @@ def state(self):
def name(self):
'''str: The name of the environment.'''

@property
@abstractmethod
def state_spaces(self):
'''A dictionary of state spaces for each agent.'''
def state_space(self, agent_id):
'''The state space for the given agent.'''

@property
def observation_spaces(self):
'''Alias for MultiagentEnvironment.state_spaces.'''
return self.state_space
def observation_space(self, agent_id):
'''Alias for MultiagentEnvironment.state_space(agent_id).'''
return self.state_space(agent_id)

@property
@abstractmethod
def action_spaces(self):
'''A dictionary of action spaces for each agent.'''
def action_space(self):
'''The action space for the given agent.'''

@property
@abstractmethod
Expand Down
81 changes: 64 additions & 17 deletions all/environments/atari.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,91 @@
import gym
from .gym import GymEnvironment
import torch
from all.core import State
from .duplicate_env import DuplicateEnvironment
from .atari_wrappers import (
NoopResetEnv,
MaxAndSkipEnv,
FireResetEnv,
WarpFrame,
LifeLostEnv,
)
from all.core import State
from .duplicate_env import DuplicateEnvironment
from ._environment import Environment


class AtariEnvironment(GymEnvironment):
def __init__(self, name, *args, **kwargs):
# need these for duplication
self._args = args
self._kwargs = kwargs
class AtariEnvironment(Environment):
def __init__(self, name, device='cpu'):

# construct the environment
env = gym.make(name + "NoFrameskip-v4")

# apply a subset of wrappers
env = NoopResetEnv(env, noop_max=30)
env = MaxAndSkipEnv(env)
if "FIRE" in env.unwrapped.get_action_meanings():
env = FireResetEnv(env)
env = WarpFrame(env)
env = LifeLostEnv(env)
# initialize
super().__init__(env, *args, **kwargs)

# initialize member variables
self._env = env
self._name = name
self._state = None
self._action = None
self._reward = None
self._done = True
self._info = None
self._device = device

def reset(self):
state = self._env.reset(), 0., False, None
self._state = State.from_gym(state, dtype=self._env.observation_space.dtype, device=self._device)
return self._state

def step(self, action):
self._state = State.from_gym(
self._env.step(self._convert(action)),
dtype=self._env.observation_space.dtype,
device=self._device
)
return self._state

def render(self, **kwargs):
return self._env.render(**kwargs)

def close(self):
return self._env.close()

def seed(self, seed):
self._env.seed(seed)

def duplicate(self, n):
return DuplicateEnvironment([AtariEnvironment(self._name, device=self._device) for _ in range(n)])

@property
def name(self):
return self._name

def reset(self):
state = self._env.reset(), 0., False, {'life_lost': False}
self._state = State.from_gym(state, dtype=self._env.observation_space.dtype, device=self._device)
@property
def state_space(self):
return self._env.observation_space

@property
def action_space(self):
return self._env.action_space

@property
def state(self):
return self._state

def duplicate(self, n):
return DuplicateEnvironment([
AtariEnvironment(self._name, *self._args, **self._kwargs) for _ in range(n)
])
@property
def env(self):
return self._env

@property
def device(self):
return self._device

def _convert(self, action):
if torch.is_tensor(action):
return action.item()
return action
16 changes: 5 additions & 11 deletions all/environments/gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from all.core import State
from ._environment import Environment
from .duplicate_env import DuplicateEnvironment
import cloudpickle
gym.logger.set_level(40)


Expand All @@ -24,15 +23,10 @@ class GymEnvironment(Environment):
device (str, optional): the device on which tensors will be stored
'''

def __init__(self, env, device=torch.device('cpu'), name=None):
if isinstance(env, str):
self._name = env
env = gym.make(env)
else:
self._name = env.__class__.__name__
if name:
self._name = name
self._env = env
def __init__(self, id, device=torch.device('cpu'), name=None):
self._env = gym.make(id)
self._id = id
self._name = name if name else id
self._state = None
self._action = None
self._reward = None
Expand Down Expand Up @@ -67,7 +61,7 @@ def seed(self, seed):
self._env.seed(seed)

def duplicate(self, n):
return DuplicateEnvironment([GymEnvironment(cloudpickle.loads(cloudpickle.dumps(self._env)), device=self.device) for _ in range(n)])
return DuplicateEnvironment([GymEnvironment(self._id, device=self.device, name=self._name) for _ in range(n)])

@property
def state_space(self):
Expand Down
12 changes: 0 additions & 12 deletions all/environments/gym_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@ def test_env_name(self):
env = GymEnvironment('CartPole-v0')
self.assertEqual(env.name, 'CartPole-v0')

def test_preconstructed_env_name(self):
env = GymEnvironment(gym.make('Blackjack-v0'))
self.assertEqual(env.name, 'BlackjackEnv')

def test_reset(self):
env = GymEnvironment('CartPole-v0')
state = env.reset()
Expand All @@ -20,14 +16,6 @@ def test_reset(self):
self.assertFalse(state.done)
self.assertEqual(state.mask, 1)

def test_reset_preconstructed_env(self):
env = GymEnvironment(gym.make('CartPole-v0'))
state = env.reset()
self.assertEqual(state.observation.shape, (4,))
self.assertEqual(state.reward, 0)
self.assertFalse(state.done)
self.assertEqual(state.mask, 1)

def test_step(self):
env = GymEnvironment('CartPole-v0')
env.reset()
Expand Down
12 changes: 6 additions & 6 deletions all/environments/multiagent_atari_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ def test_agent_iter(self):
self.assertEqual(next(it), 'first_0')

def test_state_spaces(self):
state_spaces = MultiagentAtariEnv('pong_v2', device='cpu').state_spaces
self.assertEqual(state_spaces['first_0'].shape, (1, 84, 84))
self.assertEqual(state_spaces['second_0'].shape, (1, 84, 84))
env = MultiagentAtariEnv('pong_v2', device='cpu')
self.assertEqual(env.state_space('first_0').shape, (1, 84, 84))
self.assertEqual(env.state_space('second_0').shape, (1, 84, 84))

def test_action_spaces(self):
action_spaces = MultiagentAtariEnv('pong_v2', device='cpu').action_spaces
self.assertEqual(action_spaces['first_0'].n, 18)
self.assertEqual(action_spaces['second_0'].n, 18)
env = MultiagentAtariEnv('pong_v2', device='cpu')
self.assertEqual(env.action_space('first_0').n, 18)
self.assertEqual(env.action_space('second_0').n, 18)

def test_list_agents(self):
env = MultiagentAtariEnv('pong_v2', device='cpu')
Expand Down
20 changes: 7 additions & 13 deletions all/environments/multiagent_pettingzoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, zoo_env, name, device='cuda'):
self._device = device
self.agents = self._env.agents
self.subenvs = {
agent: SubEnv(agent, device, self.state_spaces[agent], self.action_spaces[agent])
agent: SubEnv(agent, device, self.state_space(agent), self.action_space(agent))
for agent in self.agents
}

Expand Down Expand Up @@ -79,7 +79,7 @@ def duplicate(self, n):

def last(self):
observation, reward, done, info = self._env.last()
selected_obs_space = self._env.observation_spaces[self._env.agent_selection]
selected_obs_space = self._env.observation_space(self._env.agent_selection)
return MultiagentState.from_zoo(self._env.agent_selection, (observation, reward, done, info), device=self._device, dtype=selected_obs_space.dtype)

@property
Expand All @@ -94,21 +94,15 @@ def device(self):
def agent_selection(self):
return self._env.agent_selection

@property
def state_spaces(self):
return self._env.observation_spaces

@property
def observation_spaces(self):
return self._env.observation_spaces
def state_space(self, agent_id):
return self._env.observation_space(agent_id)

@property
def action_spaces(self):
return self._env.action_spaces
def action_space(self, agent_id):
return self._env.action_space(agent_id)

def _convert(self, action):
agent = self._env.agent_selection
action_space = self._env.action_spaces[agent]
action_space = self.action_space(agent)
if torch.is_tensor(action):
if isinstance(action_space, gym.spaces.Discrete):
return action.item()
Expand Down
16 changes: 8 additions & 8 deletions all/environments/multiagent_pettingzoo_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ def test_agent_iter(self):
self.assertEqual(next(it), 'leadadversary_0')

def test_state_spaces(self):
state_spaces = self._make_env().state_spaces
self.assertEqual(state_spaces['leadadversary_0'].shape, (34,))
self.assertEqual(state_spaces['adversary_0'].shape, (34,))
env = self._make_env()
self.assertEqual(env.state_space('leadadversary_0').shape, (34,))
self.assertEqual(env.state_space('adversary_0').shape, (34,))

def test_action_spaces(self):
action_spaces = self._make_env().action_spaces
self.assertEqual(action_spaces['leadadversary_0'].n, 20)
self.assertEqual(action_spaces['adversary_0'].n, 5)
env = self._make_env()
self.assertEqual(env.action_space('leadadversary_0').n, 20)
self.assertEqual(env.action_space('adversary_0').n, 5)

def test_list_agents(self):
env = self._make_env()
Expand Down Expand Up @@ -83,8 +83,8 @@ def test_variable_spaces(self):
# tests that action spaces work
for agent in env.agents:
state = env.last()
self.assertTrue(env.observation_spaces[agent].contains(state['observation'].cpu().detach().numpy()))
env.step(env.action_spaces[env.agent_selection].sample())
self.assertTrue(env.observation_space(agent).contains(state['observation'].cpu().detach().numpy()))
env.step(env.action_space(env.agent_selection).sample())

def _make_env(self):
return MultiagentPettingZooEnv(simple_world_comm_v2.env(), name="simple_world_comm_v2", device='cpu')
Expand Down
3 changes: 1 addition & 2 deletions all/environments/vector_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import torch
from all.core import StateArray
from ._vector_environment import VectorEnvironment
import cloudpickle
import numpy as np


Expand All @@ -13,7 +12,7 @@ class GymVectorEnvironment(VectorEnvironment):
This wrapper converts the output of the vector environment to PyTorch tensors,
and wraps them in a StateArray object that can be passed to a Parallel Agent.
This constructor accepts a preconstructed gym vetor environment. Note that
This constructor accepts a preconstructed gym vector environment. Note that
in the latter case, the name property is set to be the whatever the name
of the outermost wrapper on the environment is.
Expand Down
6 changes: 3 additions & 3 deletions all/experiments/experiment.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from abc import ABC, abstractmethod
import numpy as np
from scipy import stats
import torch


class Experiment(ABC):
Expand Down Expand Up @@ -76,7 +74,9 @@ def _log_test_episode(self, episode, returns):

def _log_test(self, returns):
if not self._quiet:
print('test returns (mean ± sem): {} ± {}'.format(np.mean(returns), stats.sem(returns)))
mean = np.mean(returns)
sem = np.variance(returns) / np.sqrt(len(returns))
print('test returns (mean ± sem): {} ± {}'.format(mean, sem))
self._writer.add_summary('returns-test', np.mean(returns), np.std(returns))

def save(self):
Expand Down
6 changes: 3 additions & 3 deletions all/experiments/multiagent_env_experiment.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from timeit import default_timer as timer
import numpy as np
from scipy import stats
from .writer import ExperimentWriter, CometWriter
from .experiment import Experiment


class MultiagentEnvExperiment():
Expand Down Expand Up @@ -166,7 +164,9 @@ def _log_test_episode(self, episode, returns):
def _log_test(self, returns):
for agent, agent_returns in returns.items():
if not self._quiet:
print('{} test returns (mean ± sem): {} ± {}'.format(agent, np.mean(agent_returns), stats.sem(agent_returns)))
mean = np.mean(agent_returns)
sem = np.variance(agent_returns) / np.sqrt(len(agent_returns))
print('{} test returns (mean ± sem): {} ± {}'.format(agent, mean, sem))
self._writer.add_summary('{}/returns-test'.format(agent), np.mean(agent_returns), np.std(agent_returns))

def _save_model(self):
Expand Down
2 changes: 1 addition & 1 deletion all/experiments/parallel_env_experiment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_writes_training_returns_eps(self):
)
np.testing.assert_equal(
self.experiment._writer.data["evaluation/returns/episode"]["values"],
np.array([10., 11., 17.]),
np.array([10., 12., 19.]),
)

def test_writes_test_returns(self):
Expand Down

0 comments on commit 7df8e05

Please sign in to comment.