-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add initial multiagent_atari implementation * add multiagent atari initial code * improve package structure * add multiagent atari * start updating to new preset method * add independent * render and replay buffer size * add watch script * remove starter code for parameter sharing dqn * update multiagent atari env unittest * update tests and doc for MultiagentAtari environment * add abstract multiagent environment * add multiagent env documentation * update documentation and make MultiagentAtari implement abstract methods * add multiagent env test * add test mode * add ma-atari to extras * upgrade gym version * add autorom * add integration tests * run formatter * install unrar on travis * fix mock writer * make unit test not write preset to disk * add multiagent atari preset unittest * formatting Co-authored-by: Ben Black <weepingwillowben@gmail.com>
- Loading branch information
1 parent
7a8860d
commit 97e64d5
Showing
29 changed files
with
1,044 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,26 @@ | ||
import all.agents | ||
import all.approximation | ||
import all.core | ||
import all.environments | ||
import all.logging | ||
import all.memory | ||
import all.nn | ||
import all.optim | ||
import all.policies | ||
import all.presets | ||
from all.core import State, StateArray | ||
|
||
__all__ = ['nn', 'State', 'StateArray'] | ||
__all__ = [ | ||
'agents', | ||
'approximation', | ||
'core', | ||
'environments', | ||
'logging', | ||
'memory', | ||
'nn', | ||
'optim', | ||
'policies', | ||
'presets', | ||
'State', | ||
'StateArray' | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from ._multiagent import Multiagent | ||
from .independent import IndependentMultiagent | ||
|
||
__all__ = [ | ||
"Multiagent", | ||
"IndependentMultiagent" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from abc import ABC, abstractmethod | ||
from all.optim import Schedulable | ||
|
||
|
||
class Multiagent(ABC, Schedulable): | ||
""" | ||
A reinforcement learning agent. | ||
In reinforcement learning, an Agent learns by interacting with an Environment. | ||
Usually, an agent tries to maximize a reward signal. | ||
It does this by observing environment "states", taking "actions", receiving "rewards", | ||
and in doing so, learning which state-action pairs correlate with high rewards. | ||
An Agent implementation should encapsulate some particular reinforcement learning algorihthm. | ||
""" | ||
|
||
@abstractmethod | ||
def act(self, state): | ||
""" | ||
Select an action for the current timestep and update internal parameters. | ||
In general, a reinforcement learning agent does several things during a timestep: | ||
1. Choose an action, | ||
2. Compute the TD error from the previous time step | ||
3. Update the value function and/or policy | ||
The order of these steps differs depending on the agent. | ||
This method allows the agent to do whatever is necessary for itself on a given timestep. | ||
However, the agent must ultimately return an action. | ||
Args: | ||
state (all.core.MultiAgentState): The environment state at the current timestep. | ||
Returns: | ||
torch.Tensor: The action to take at the current timestep. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from ._multiagent import Multiagent | ||
|
||
|
||
class IndependentMultiagent(Multiagent): | ||
def __init__(self, agents): | ||
self.agents = agents | ||
|
||
def act(self, state): | ||
return self.agents[state['agent']].act(state) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
from .state import State, StateArray | ||
from .state import State, StateArray, MultiAgentState | ||
|
||
__all__ = ['State', 'StateArray'] | ||
__all__ = ['State', 'StateArray', 'MultiAgentState'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,13 @@ | ||
from .abstract import Environment | ||
from ._environment import Environment | ||
from._multiagent_environment import MultiagentEnvironment | ||
from .gym import GymEnvironment | ||
from .atari import AtariEnvironment | ||
from .multiagent_atari import MultiagentAtariEnv | ||
|
||
__all__ = ["Environment", "GymEnvironment", "AtariEnvironment"] | ||
__all__ = [ | ||
"Environment", | ||
"MultiagentEnvironment", | ||
"GymEnvironment", | ||
"AtariEnvironment", | ||
"MultiagentAtariEnv", | ||
] |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
from abc import ABC, abstractmethod | ||
|
||
|
||
class MultiagentEnvironment(ABC): | ||
''' | ||
A multiagent reinforcement learning Environment. | ||
The Multiagent variant of the Environment object. | ||
An Environment defines the dynamics of a particular problem: | ||
the states, the actions, the transitions between states, and the rewards given to the agent. | ||
Environments are often used to benchmark reinforcement learning agents, | ||
or to define real problems that the user hopes to solve using reinforcement learning. | ||
''' | ||
|
||
@abstractmethod | ||
def reset(self): | ||
''' | ||
Reset the environment and return a new intial state for the first agent. | ||
Returns | ||
all.core.MultiagentState: The initial state for the next episode. | ||
''' | ||
|
||
@abstractmethod | ||
def step(self, action): | ||
''' | ||
Apply an action for the current agent and get the multiagent state for the next agent. | ||
Parameters: | ||
action: The Action for the current agent and timestep. | ||
Returns: | ||
all.core.MultiagentState: The state for the next agent. | ||
''' | ||
|
||
@abstractmethod | ||
def render(self, **kwargs): | ||
'''Render the current environment state.''' | ||
|
||
@abstractmethod | ||
def close(self): | ||
'''Clean up any extraneaous environment objects.''' | ||
|
||
@abstractmethod | ||
def agent_iter(self): | ||
''' | ||
Create an iterable which that the next element is always the name of the agent whose turn it is to act. | ||
Returns: | ||
An Iterable over Agent strings. | ||
''' | ||
|
||
@abstractmethod | ||
def last(self): | ||
''' | ||
Get the MultiagentState object for the current agent. | ||
Returns: | ||
The all.core.MultiagentState object for the current agent. | ||
''' | ||
|
||
@abstractmethod | ||
def is_done(self, agent): | ||
''' | ||
Determine whether a given agent is done. | ||
Args: | ||
agent (str): The name of the agent. | ||
Returns: | ||
A boolean representing whether the given agent is done. | ||
''' | ||
|
||
@property | ||
def state(self): | ||
'''The State for the current agent.''' | ||
return self.last() | ||
|
||
@property | ||
@abstractmethod | ||
def name(self): | ||
'''str: The name of the environment.''' | ||
|
||
@property | ||
@abstractmethod | ||
def state_spaces(self): | ||
'''A dictionary of state spaces for each agent.''' | ||
|
||
@property | ||
def observation_spaces(self): | ||
'''Alias for MultiagentEnvironment.state_spaces.''' | ||
return self.state_space | ||
|
||
@property | ||
@abstractmethod | ||
def action_spaces(self): | ||
'''A dictionary of action spaces for each agent.''' | ||
|
||
@property | ||
@abstractmethod | ||
def device(self): | ||
''' | ||
The torch device the environment lives on. | ||
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.