Merge pull request #243 from cpnota/release/0.7.0

Release/0.7.0
cpnota · Apr 12, 2021 · 411c2f9 · 411c2f9
2 parents 9c44d41 + f72b0e4
commit 411c2f9
Show file tree

Hide file tree

Showing 173 changed files with 4,980 additions and 2,049 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -0,0 +1,38 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Python package
+
+on:
+  push:
+    branches: [ master, develop ]
+  pull_request:
+    branches: [ master, develop ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        sudo apt-get install swig
+        sudo apt-get install unrar
+        pip install torch==1.8.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+        make install
+        AutoROM -v
+    - name: Lint code
+      run: |
+        make lint
+    - name: Run tests
+      run: |
+        make test
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -0,0 +1,31 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        python setup.py sdist bdist_wheel
+        twine upload dist/*
diff --git a/.gitignore b/.gitignore
@@ -10,9 +10,14 @@ autonomous_learning_library.egg-info
 # editor
 .vscode
 .idea
+*.code-workspace
 
 # non-committed code
 local
 legacy
 /runs
 /out
+
+# notebooks
+*.ipynb
+*.ipynb_checkpoints
diff --git a/.travis.yml b/.travis.yml
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -26,6 +26,12 @@ The unit tests may be run using:
 make test
 ```
 
+You can automatically format your code to match our code style using:
+
+```
+make format
+```
+
 Finally, you rebuild the documentation using:
 
 ```

diff --git a/Makefile b/Makefile
@@ -1,12 +1,20 @@
 install:
 	pip install -e .[dev]
 
-lint:
-	pylint all --rcfile=.pylintrc
+test: unit-test integration-test
 
-test:
+unit-test:
 	python -m unittest discover -s all -p "*test.py"
 
+integration-test:
+	python -m unittest discover -s integration -p "*test.py"
+
+lint:
+	flake8 --ignore "E501,E731,E74,E402,F401,W503,E128" all
+
+format:
+	autopep8 --in-place --aggressive --aggressive --ignore "E501,E731,E74,E402,F401,W503,E128" -r all
+
 tensorboard:
 	tensorboard --logdir runs
 

diff --git a/all/.DS_Store b/all/.DS_Store
diff --git a/all/__init__.py b/all/__init__.py
@@ -1,4 +1,26 @@
+import all.agents
+import all.approximation
+import all.core
+import all.environments
+import all.logging
+import all.memory
 import all.nn
+import all.optim
+import all.policies
+import all.presets
 from all.core import State, StateArray
 
-__all__ = ['nn', 'State', 'StateArray']
+__all__ = [
+    'agents',
+    'approximation',
+    'core',
+    'environments',
+    'logging',
+    'memory',
+    'nn',
+    'optim',
+    'policies',
+    'presets',
+    'State',
+    'StateArray'
+]
diff --git a/all/agents/__init__.py b/all/agents/__init__.py
@@ -1,29 +1,50 @@
 from ._agent import Agent
-from .a2c import A2C
-from .c51 import C51
-from .ddpg import DDPG
-from .ddqn import DDQN
-from .dqn import DQN
-from .ppo import PPO
-from .rainbow import Rainbow
-from .sac import SAC
-from .vac import VAC
-from .vpg import VPG
-from .vqn import VQN
-from .vsarsa import VSarsa
+from ._multiagent import Multiagent
+from ._parallel_agent import ParallelAgent
+from .a2c import A2C, A2CTestAgent
+from .c51 import C51, C51TestAgent
+from .ddpg import DDPG, DDPGTestAgent
+from .ddqn import DDQN, DDQNTestAgent
+from .dqn import DQN, DQNTestAgent
+from .independent import IndependentMultiagent
+from .ppo import PPO, PPOTestAgent
+from .rainbow import Rainbow, RainbowTestAgent
+from .sac import SAC, SACTestAgent
+from .vac import VAC, VACTestAgent
+from .vpg import VPG, VPGTestAgent
+from .vqn import VQN, VQNTestAgent
+from .vsarsa import VSarsa, VSarsaTestAgent
+
 
 __all__ = [
+    # Agent interfaces
     "Agent",
+    "Multiagent",
+    "ParallelAgent",
+    # Agent implementations
     "A2C",
+    "A2CTestAgent",
     "C51",
+    "C51TestAgent",
     "DDPG",
+    "DDPGTestAgent",
     "DDQN",
+    "DDQNTestAgent",
     "DQN",
+    "DQNTestAgent",
     "PPO",
+    "PPOTestAgent",
     "Rainbow",
+    "RainbowTestAgent",
     "SAC",
+    "SACTestAgent",
     "VAC",
+    "VACTestAgent",
     "VPG",
+    "VPGTestAgent",
     "VQN",
+    "VQNTestAgent",
     "VSarsa",
+    "VSarsaTestAgent",
+    "IndependentMultiagent",
 ]
diff --git a/all/agents/_agent.py b/all/agents/_agent.py
@@ -1,15 +1,16 @@
 from abc import ABC, abstractmethod
 from all.optim import Schedulable
 
+
 class Agent(ABC, Schedulable):
     """
     A reinforcement learning agent.
 
     In reinforcement learning, an Agent learns by interacting with an Environment.
-    Usually, an agent tries to maximize a reward signal.
+    Usually, an Agent tries to maximize a reward signal.
     It does this by observing environment "states", taking "actions", receiving "rewards",
-    and in doing so, learning which state-action pairs correlate with high rewards.
-    An Agent implementation should encapsulate some particular reinforcement learning algorihthm.
+    and learning which state-action pairs correlate with high rewards.
+    An Agent implementation should encapsulate some particular reinforcement learning algorithm.
     """
 
     @abstractmethod
@@ -31,20 +32,3 @@ def act(self, state):
         Returns:
             torch.Tensor: The action to take at the current timestep.
         """
-
-    @abstractmethod
-    def eval(self, state):
-        """
-        Select an action for the current timestep in evaluation mode.
-
-        Unlike act, this method should NOT update the internal parameters of the agent.
-        Most of the time, this method should return the greedy action according to the current policy.
-        This method is useful when using evaluation methodologies that distinguish between the performance
-        of the agent during training and the performance of the resulting policy.
-
-        Args:
-            state (all.environment.State): The environment state at the current timestep.
-
-        Returns:
-            torch.Tensor: The action to take at the current timestep.
-        """
diff --git a/all/agents/_multiagent.py b/all/agents/_multiagent.py
@@ -0,0 +1,34 @@
+from abc import ABC, abstractmethod
+from all.optim import Schedulable
+
+
+class Multiagent(ABC, Schedulable):
+    """
+    A multiagent RL agent. Differs from standard agents in that it accepts a multiagent state.
+
+    In reinforcement learning, an Agent learns by interacting with an Environment.
+    Usually, an agent tries to maximize a reward signal.
+    It does this by observing environment "states", taking "actions", receiving "rewards",
+    and learning which state-action pairs correlate with high rewards.
+    An Agent implementation should encapsulate some particular reinforcement learning algorithm.
+    """
+
+    @abstractmethod
+    def act(self, multiagent_state):
+        """
+        Select an action for the current timestep and update internal parameters.
+
+        In general, a reinforcement learning agent does several things during a timestep:
+        1. Choose an action,
+        2. Compute the TD error from the previous time step
+        3. Update the value function and/or policy
+        The order of these steps differs depending on the agent.
+        This method allows the agent to do whatever is necessary for itself on a given timestep.
+        However, the agent must ultimately return an action.
+
+        Args:
+            multiagent_state (all.core.MultiagentState): The environment state at the current timestep.
+
+        Returns:
+            torch.Tensor: The action for the current agent to take at the current timestep.
+        """
diff --git a/all/agents/_parallel_agent.py b/all/agents/_parallel_agent.py
@@ -0,0 +1,36 @@
+from abc import ABC, abstractmethod
+from all.optim import Schedulable
+
+
+class ParallelAgent(ABC, Schedulable):
+    """
+    A reinforcement learning agent that chooses actions for multiple states simultaneously.
+    Differs from SingleAgent in that it accepts a StateArray instead of a State to process
+    input from multiple environments in parallel.
+
+    In reinforcement learning, an Agent learns by interacting with an Environment.
+    Usually, an Agent tries to maximize a reward signal.
+    It does this by observing environment "states", taking "actions", receiving "rewards",
+    and learning which state-action pairs correlate with high rewards.
+    An Agent implementation should encapsulate some particular reinforcement learning algorithm.
+    """
+
+    @abstractmethod
+    def act(self, state_array):
+        """
+        Select an action for the current timestep and update internal parameters.
+
+        In general, a reinforcement learning agent does several things during a timestep:
+        1. Choose an action,
+        2. Compute the TD error from the previous time step
+        3. Update the value function and/or policy
+        The order of these steps differs depending on the agent.
+        This method allows the agent to do whatever is necessary for itself on a given timestep.
+        However, the agent must ultimately return an action.
+
+        Args:
+            state_array (all.environment.StateArray): An array of states for each parallel environment.
+
+        Returns:
+            torch.Tensor: The actions to take for each parallel environmets.
+        """
diff --git a/all/agents/a2c.py b/all/agents/a2c.py
@@ -1,10 +1,12 @@
+import torch
 from torch.nn.functional import mse_loss
 from all.logging import DummyWriter
 from all.memory import NStepAdvantageBuffer
 from ._agent import Agent
+from ._parallel_agent import ParallelAgent
 
 
-class A2C(Agent):
+class A2C(ParallelAgent):
     """
     Advantage Actor-Critic (A2C).
     A2C is policy gradient method in the actor-critic family.
@@ -24,6 +26,7 @@ class A2C(Agent):
         n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
         writer (Writer): Used for logging.
     """
+
     def __init__(
             self,
             features,
@@ -60,9 +63,6 @@ def act(self, states):
         self._actions = self.policy.no_grad(self.features.no_grad(states)).sample()
         return self._actions
 
-    def eval(self, states):
-        return self.policy.eval(self.features.eval(states))
-
     def _train(self, next_states):
         if len(self._buffer) >= self._batch_size:
             # load trajectories from buffer
@@ -99,4 +99,12 @@ def _make_buffer(self):
             self.n_envs,
             discount_factor=self.discount_factor
         )
-
+
+
+class A2CTestAgent(Agent):
+    def __init__(self, features, policy):
+        self.features = features
+        self.policy = policy
+
+    def act(self, state):
+        return self.policy.eval(self.features.eval(state)).sample()