Skip to content

Commit

Permalink
Merge pull request #243 from cpnota/release/0.7.0
Browse files Browse the repository at this point in the history
Release/0.7.0
  • Loading branch information
cpnota committed Apr 12, 2021
2 parents 9c44d41 + f72b0e4 commit 411c2f9
Show file tree
Hide file tree
Showing 173 changed files with 4,980 additions and 2,049 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/python-package.yml
@@ -0,0 +1,38 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Python package

on:
push:
branches: [ master, develop ]
pull_request:
branches: [ master, develop ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
sudo apt-get install swig
sudo apt-get install unrar
pip install torch==1.8.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
make install
AutoROM -v
- name: Lint code
run: |
make lint
- name: Run tests
run: |
make test
31 changes: 31 additions & 0 deletions .github/workflows/python-publish.yml
@@ -0,0 +1,31 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries

name: Upload Python Package

on:
release:
types: [created]

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python setup.py sdist bdist_wheel
twine upload dist/*
5 changes: 5 additions & 0 deletions .gitignore
Expand Up @@ -10,9 +10,14 @@ autonomous_learning_library.egg-info
# editor
.vscode
.idea
*.code-workspace

# non-committed code
local
legacy
/runs
/out

# notebooks
*.ipynb
*.ipynb_checkpoints
17 changes: 0 additions & 17 deletions .travis.yml

This file was deleted.

6 changes: 6 additions & 0 deletions CONTRIBUTING.md
Expand Up @@ -26,6 +26,12 @@ The unit tests may be run using:
make test
```

You can automatically format your code to match our code style using:

```
make format
```

Finally, you rebuild the documentation using:

```
Expand Down
14 changes: 11 additions & 3 deletions Makefile
@@ -1,12 +1,20 @@
install:
pip install -e .[dev]

lint:
pylint all --rcfile=.pylintrc
test: unit-test integration-test

test:
unit-test:
python -m unittest discover -s all -p "*test.py"

integration-test:
python -m unittest discover -s integration -p "*test.py"

lint:
flake8 --ignore "E501,E731,E74,E402,F401,W503,E128" all

format:
autopep8 --in-place --aggressive --aggressive --ignore "E501,E731,E74,E402,F401,W503,E128" -r all

tensorboard:
tensorboard --logdir runs

Expand Down
Binary file added all/.DS_Store
Binary file not shown.
24 changes: 23 additions & 1 deletion all/__init__.py
@@ -1,4 +1,26 @@
import all.agents
import all.approximation
import all.core
import all.environments
import all.logging
import all.memory
import all.nn
import all.optim
import all.policies
import all.presets
from all.core import State, StateArray

__all__ = ['nn', 'State', 'StateArray']
__all__ = [
'agents',
'approximation',
'core',
'environments',
'logging',
'memory',
'nn',
'optim',
'policies',
'presets',
'State',
'StateArray'
]
45 changes: 33 additions & 12 deletions all/agents/__init__.py
@@ -1,29 +1,50 @@
from ._agent import Agent
from .a2c import A2C
from .c51 import C51
from .ddpg import DDPG
from .ddqn import DDQN
from .dqn import DQN
from .ppo import PPO
from .rainbow import Rainbow
from .sac import SAC
from .vac import VAC
from .vpg import VPG
from .vqn import VQN
from .vsarsa import VSarsa
from ._multiagent import Multiagent
from ._parallel_agent import ParallelAgent
from .a2c import A2C, A2CTestAgent
from .c51 import C51, C51TestAgent
from .ddpg import DDPG, DDPGTestAgent
from .ddqn import DDQN, DDQNTestAgent
from .dqn import DQN, DQNTestAgent
from .independent import IndependentMultiagent
from .ppo import PPO, PPOTestAgent
from .rainbow import Rainbow, RainbowTestAgent
from .sac import SAC, SACTestAgent
from .vac import VAC, VACTestAgent
from .vpg import VPG, VPGTestAgent
from .vqn import VQN, VQNTestAgent
from .vsarsa import VSarsa, VSarsaTestAgent


__all__ = [
# Agent interfaces
"Agent",
"Multiagent",
"ParallelAgent",
# Agent implementations
"A2C",
"A2CTestAgent",
"C51",
"C51TestAgent",
"DDPG",
"DDPGTestAgent",
"DDQN",
"DDQNTestAgent",
"DQN",
"DQNTestAgent",
"PPO",
"PPOTestAgent",
"Rainbow",
"RainbowTestAgent",
"SAC",
"SACTestAgent",
"VAC",
"VACTestAgent",
"VPG",
"VPGTestAgent",
"VQN",
"VQNTestAgent",
"VSarsa",
"VSarsaTestAgent",
"IndependentMultiagent",
]
24 changes: 4 additions & 20 deletions all/agents/_agent.py
@@ -1,15 +1,16 @@
from abc import ABC, abstractmethod
from all.optim import Schedulable


class Agent(ABC, Schedulable):
"""
A reinforcement learning agent.
In reinforcement learning, an Agent learns by interacting with an Environment.
Usually, an agent tries to maximize a reward signal.
Usually, an Agent tries to maximize a reward signal.
It does this by observing environment "states", taking "actions", receiving "rewards",
and in doing so, learning which state-action pairs correlate with high rewards.
An Agent implementation should encapsulate some particular reinforcement learning algorihthm.
and learning which state-action pairs correlate with high rewards.
An Agent implementation should encapsulate some particular reinforcement learning algorithm.
"""

@abstractmethod
Expand All @@ -31,20 +32,3 @@ def act(self, state):
Returns:
torch.Tensor: The action to take at the current timestep.
"""

@abstractmethod
def eval(self, state):
"""
Select an action for the current timestep in evaluation mode.
Unlike act, this method should NOT update the internal parameters of the agent.
Most of the time, this method should return the greedy action according to the current policy.
This method is useful when using evaluation methodologies that distinguish between the performance
of the agent during training and the performance of the resulting policy.
Args:
state (all.environment.State): The environment state at the current timestep.
Returns:
torch.Tensor: The action to take at the current timestep.
"""
34 changes: 34 additions & 0 deletions all/agents/_multiagent.py
@@ -0,0 +1,34 @@
from abc import ABC, abstractmethod
from all.optim import Schedulable


class Multiagent(ABC, Schedulable):
"""
A multiagent RL agent. Differs from standard agents in that it accepts a multiagent state.
In reinforcement learning, an Agent learns by interacting with an Environment.
Usually, an agent tries to maximize a reward signal.
It does this by observing environment "states", taking "actions", receiving "rewards",
and learning which state-action pairs correlate with high rewards.
An Agent implementation should encapsulate some particular reinforcement learning algorithm.
"""

@abstractmethod
def act(self, multiagent_state):
"""
Select an action for the current timestep and update internal parameters.
In general, a reinforcement learning agent does several things during a timestep:
1. Choose an action,
2. Compute the TD error from the previous time step
3. Update the value function and/or policy
The order of these steps differs depending on the agent.
This method allows the agent to do whatever is necessary for itself on a given timestep.
However, the agent must ultimately return an action.
Args:
multiagent_state (all.core.MultiagentState): The environment state at the current timestep.
Returns:
torch.Tensor: The action for the current agent to take at the current timestep.
"""
36 changes: 36 additions & 0 deletions all/agents/_parallel_agent.py
@@ -0,0 +1,36 @@
from abc import ABC, abstractmethod
from all.optim import Schedulable


class ParallelAgent(ABC, Schedulable):
"""
A reinforcement learning agent that chooses actions for multiple states simultaneously.
Differs from SingleAgent in that it accepts a StateArray instead of a State to process
input from multiple environments in parallel.
In reinforcement learning, an Agent learns by interacting with an Environment.
Usually, an Agent tries to maximize a reward signal.
It does this by observing environment "states", taking "actions", receiving "rewards",
and learning which state-action pairs correlate with high rewards.
An Agent implementation should encapsulate some particular reinforcement learning algorithm.
"""

@abstractmethod
def act(self, state_array):
"""
Select an action for the current timestep and update internal parameters.
In general, a reinforcement learning agent does several things during a timestep:
1. Choose an action,
2. Compute the TD error from the previous time step
3. Update the value function and/or policy
The order of these steps differs depending on the agent.
This method allows the agent to do whatever is necessary for itself on a given timestep.
However, the agent must ultimately return an action.
Args:
state_array (all.environment.StateArray): An array of states for each parallel environment.
Returns:
torch.Tensor: The actions to take for each parallel environmets.
"""
18 changes: 13 additions & 5 deletions all/agents/a2c.py
@@ -1,10 +1,12 @@
import torch
from torch.nn.functional import mse_loss
from all.logging import DummyWriter
from all.memory import NStepAdvantageBuffer
from ._agent import Agent
from ._parallel_agent import ParallelAgent


class A2C(Agent):
class A2C(ParallelAgent):
"""
Advantage Actor-Critic (A2C).
A2C is policy gradient method in the actor-critic family.
Expand All @@ -24,6 +26,7 @@ class A2C(Agent):
n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
writer (Writer): Used for logging.
"""

def __init__(
self,
features,
Expand Down Expand Up @@ -60,9 +63,6 @@ def act(self, states):
self._actions = self.policy.no_grad(self.features.no_grad(states)).sample()
return self._actions

def eval(self, states):
return self.policy.eval(self.features.eval(states))

def _train(self, next_states):
if len(self._buffer) >= self._batch_size:
# load trajectories from buffer
Expand Down Expand Up @@ -99,4 +99,12 @@ def _make_buffer(self):
self.n_envs,
discount_factor=self.discount_factor
)



class A2CTestAgent(Agent):
def __init__(self, features, policy):
self.features = features
self.policy = policy

def act(self, state):
return self.policy.eval(self.features.eval(state)).sample()

0 comments on commit 411c2f9

Please sign in to comment.