Skip to content

Commit

Permalink
Merge pull request #248 from cpnota/release/0.7.1
Browse files Browse the repository at this point in the history
Release/0.7.1
  • Loading branch information
cpnota committed Jun 14, 2021
2 parents 67b27aa + 074d0ca commit 01836e0
Show file tree
Hide file tree
Showing 46 changed files with 707 additions and 159 deletions.
1 change: 1 addition & 0 deletions .github/workflows/python-package.yml
Expand Up @@ -30,6 +30,7 @@ jobs:
pip install torch==1.8.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
make install
AutoROM -v
python -m atari_py.import_roms $(python -c 'import site; print(site.getsitepackages()[0])')/multi_agent_ale_py/ROM
- name: Lint code
run: |
make lint
Expand Down
2 changes: 1 addition & 1 deletion all/agents/a2c.py
Expand Up @@ -101,7 +101,7 @@ def _make_buffer(self):
)


class A2CTestAgent(Agent):
class A2CTestAgent(Agent, ParallelAgent):
def __init__(self, features, policy):
self.features = features
self.policy = policy
Expand Down
10 changes: 3 additions & 7 deletions all/agents/dqn.py
Expand Up @@ -81,12 +81,8 @@ def _should_train(self):


class DQNTestAgent(Agent):
def __init__(self, q, n_actions, exploration=0.):
self.q = q
self.n_actions = n_actions
self.exploration = 0.001
def __init__(self, policy):
self.policy = policy

def act(self, state):
if np.random.rand() < self.exploration:
return np.random.randint(0, self.n_actions)
return torch.argmax(self.q.eval(state)).item()
return self.policy.eval(state)
2 changes: 1 addition & 1 deletion all/agents/sac.py
Expand Up @@ -98,7 +98,7 @@ def _train(self):

# adjust temperature
temperature_grad = (_log_probs + self.entropy_target).mean()
self.temperature += self.lr_temperature * temperature_grad.detach()
self.temperature = max(0, self.temperature + self.lr_temperature * temperature_grad.detach())

# additional debugging info
self.writer.add_loss('entropy', -_log_probs.mean())
Expand Down
7 changes: 6 additions & 1 deletion all/agents/vqn.py
Expand Up @@ -50,4 +50,9 @@ def _train(self, reward, next_state):
self.q.reinforce(loss)


VQNTestAgent = DQNTestAgent
class VQNTestAgent(Agent, ParallelAgent):
def __init__(self, policy):
self.policy = policy

def act(self, state):
return self.policy.eval(state)
5 changes: 2 additions & 3 deletions all/agents/vsarsa.py
@@ -1,7 +1,6 @@
from torch.nn.functional import mse_loss
from ._agent import Agent
from ._parallel_agent import ParallelAgent
from .dqn import DQNTestAgent
from .vqn import VQNTestAgent


class VSarsa(ParallelAgent):
Expand Down Expand Up @@ -47,4 +46,4 @@ def _train(self, reward, next_state, next_action):
self.q.reinforce(loss)


VSarsaTestAgent = DQNTestAgent
VSarsaTestAgent = VQNTestAgent
5 changes: 3 additions & 2 deletions all/bodies/atari.py
Expand Up @@ -6,7 +6,8 @@

class DeepmindAtariBody(Body):
def __init__(self, agent, lazy_frames=False, episodic_lives=True, frame_stack=4, clip_rewards=True):
agent = FrameStack(agent, lazy=lazy_frames, size=frame_stack)
if frame_stack > 1:
agent = FrameStack(agent, lazy=lazy_frames, size=frame_stack)
if clip_rewards:
agent = ClipRewards(agent)
if episodic_lives:
Expand All @@ -19,7 +20,7 @@ def process_state(self, state):
if 'life_lost' not in state:
return state

if len(state) == 1:
if len(state.shape) == 0:
if state['life_lost']:
return state.update('mask', 0.)
return state
Expand Down
5 changes: 2 additions & 3 deletions all/bodies/vision.py
Expand Up @@ -69,10 +69,9 @@ def update(self, key, value):
x = {}
for k in self.keys():
if not k == key:
x[k] = super().__getitem__(k)
x[k] = dict.__getitem__(self, k)
x[key] = value
state = LazyState(x, device=self.device)
state.to_cache = self.to_cache
state = LazyState.from_state(x, x['observation'], self.to_cache)
return state

def to(self, device):
Expand Down
7 changes: 6 additions & 1 deletion all/environments/__init__.py
@@ -1,9 +1,12 @@
from ._environment import Environment
from._multiagent_environment import MultiagentEnvironment
from ._multiagent_environment import MultiagentEnvironment
from ._vector_environment import VectorEnvironment
from .gym import GymEnvironment
from .atari import AtariEnvironment
from .multiagent_atari import MultiagentAtariEnv
from .multiagent_pettingzoo import MultiagentPettingZooEnv
from .duplicate_env import DuplicateEnvironment
from .vector_env import GymVectorEnvironment
from .pybullet import PybulletEnvironment

__all__ = [
Expand All @@ -13,5 +16,7 @@
"AtariEnvironment",
"MultiagentAtariEnv",
"MultiagentPettingZooEnv",
"GymVectorEnvironment",
"DuplicateEnvironment",
"PybulletEnvironment",
]
116 changes: 116 additions & 0 deletions all/environments/_vector_environment.py
@@ -0,0 +1,116 @@
from abc import ABC, abstractmethod


class VectorEnvironment(ABC):
"""
A reinforcement learning vector Environment.
Similar to a regular RL environment except many environments are stacked together
in the observations, rewards, and dones, and the vector environment expects
an action to be given for each environment in step.
Also, since sub-environments are done at different times, you do not need to
manually reset the environments when they are done, rather the vector environment
automatically resets environments when they are complete.
"""

@property
@abstractmethod
def name(self):
"""
The name of the environment.
"""

@abstractmethod
def reset(self):
"""
Reset the environment and return a new initial state.
Returns
-------
State
The initial state for the next episode.
"""

@abstractmethod
def step(self, action):
"""
Apply an action and get the next state.
Parameters
----------
action : Action
The action to apply at the current time step.
Returns
-------
all.environments.State
The State of the environment after the action is applied.
This State object includes both the done flag and any additional "info"
float
The reward achieved by the previous action
"""

@abstractmethod
def close(self):
"""
Clean up any extraneous environment objects.
"""

@property
@abstractmethod
def state_array(self):
"""
A StateArray of the Environments at the current timestep.
"""

@property
@abstractmethod
def state_space(self):
"""
The Space representing the range of observable states for each environment.
Returns
-------
Space
An object of type Space that represents possible states the agent may observe
"""

@property
def observation_space(self):
"""
Alias for Environment.state_space.
Returns
-------
Space
An object of type Space that represents possible states the agent may observe
"""
return self.state_space

@property
@abstractmethod
def action_space(self):
"""
The Space representing the range of possible actions for each environment.
Returns
-------
Space
An object of type Space that represents possible actions the agent may take
"""

@property
@abstractmethod
def device(self):
"""
The torch device the environment lives on.
"""

@property
@abstractmethod
def num_envs(self):
"""
Number of environments in vector. This is the number of actions step() expects as input
and the number of observations, dones, etc returned by the environment.
"""
5 changes: 3 additions & 2 deletions all/environments/atari.py
Expand Up @@ -8,6 +8,7 @@
LifeLostEnv,
)
from all.core import State
from .duplicate_env import DuplicateEnvironment


class AtariEnvironment(GymEnvironment):
Expand Down Expand Up @@ -38,6 +39,6 @@ def reset(self):
return self._state

def duplicate(self, n):
return [
return DuplicateEnvironment([
AtariEnvironment(self._name, *self._args, **self._kwargs) for _ in range(n)
]
])
73 changes: 73 additions & 0 deletions all/environments/duplicate_env.py
@@ -0,0 +1,73 @@
import gym
import torch
from all.core import State
from ._vector_environment import VectorEnvironment
import numpy as np


class DuplicateEnvironment(VectorEnvironment):
'''
Turns a list of ALL Environment objects into a VectorEnvironment object
This wrapper just takes the list of States the environments generate and outputs
a StateArray object containing all of the environment states. Like all vector
environments, the sub environments are automatically reset when done.
Args:
envs: A list of ALL environments
device (optional): the device on which tensors will be stored
'''

def __init__(self, envs, device=torch.device('cpu')):
self._name = envs[0].name
self._envs = envs
self._state = None
self._action = None
self._reward = None
self._done = True
self._info = None
self._device = device

@property
def name(self):
return self._name

def reset(self):
self._state = State.array([sub_env.reset() for sub_env in self._envs])
return self._state

def step(self, actions):
states = []
actions = actions.cpu().detach().numpy()
for sub_env, action in zip(self._envs, actions):
state = sub_env.reset() if sub_env.state.done else sub_env.step(action)
states.append(state)
self._state = State.array(states)
return self._state

def close(self):
return self._env.close()

def seed(self, seed):
for i, env in enumerate(self._envs):
env.seed(seed + i)

@property
def state_space(self):
return self._envs[0].observation_space

@property
def action_space(self):
return self._envs[0].action_space

@property
def state_array(self):
return self._state

@property
def device(self):
return self._device

@property
def num_envs(self):
return len(self._envs)
54 changes: 54 additions & 0 deletions all/environments/duplicate_env_test.py
@@ -0,0 +1,54 @@
import unittest
import gym
import torch
from all.environments import DuplicateEnvironment, GymEnvironment


def make_vec_env(num_envs=3):
env = [GymEnvironment('CartPole-v0') for i in range(num_envs)]
return env


class DuplicateEnvironmentTest(unittest.TestCase):
def test_env_name(self):
env = DuplicateEnvironment(make_vec_env())
self.assertEqual(env.name, 'CartPole-v0')

def test_num_envs(self):
num_envs = 5
env = DuplicateEnvironment(make_vec_env(num_envs))
self.assertEqual(env.num_envs, num_envs)
self.assertEqual((num_envs,), env.reset().shape)

def test_reset(self):
num_envs = 5
env = DuplicateEnvironment(make_vec_env(num_envs))
state = env.reset()
self.assertEqual(state.observation.shape, (num_envs, 4))
self.assertTrue((state.reward == torch.zeros(num_envs, )).all())
self.assertTrue((state.done == torch.zeros(num_envs, )).all())
self.assertTrue((state.mask == torch.ones(num_envs, )).all())

def test_step(self):
num_envs = 5
env = DuplicateEnvironment(make_vec_env(num_envs))
env.reset()
state = env.step(torch.ones(num_envs, dtype=torch.int32))
self.assertEqual(state.observation.shape, (num_envs, 4))
self.assertTrue((state.reward == torch.ones(num_envs, )).all())
self.assertTrue((state.done == torch.zeros(num_envs, )).all())
self.assertTrue((state.mask == torch.ones(num_envs, )).all())

def test_step_until_done(self):
num_envs = 3
env = DuplicateEnvironment(make_vec_env(num_envs))
env.seed(5)
env.reset()
for _ in range(100):
state = env.step(torch.ones(num_envs, dtype=torch.int32))
if state.done[0]:
break
self.assertEqual(state[0].observation.shape, (4,))
self.assertEqual(state[0].reward, 1.)
self.assertTrue(state[0].done)
self.assertEqual(state[0].mask, 0)

0 comments on commit 01836e0

Please sign in to comment.