Skip to content

Commit

Permalink
Change linter to Flake8 (#183)
Browse files Browse the repository at this point in the history
* flake8 compliance

* swap fix

* fix import structure issue

* fix 'not in' statements

* fix unused args in atari wrappers

* fix remaining flake8 errors

* change linter to pep8

* remove pylint disables

* trigger build

Co-authored-by: Justin Terry <justinkterry@gmail.com>
  • Loading branch information
cpnota and jkterry1 committed Nov 4, 2020
1 parent db2d1da commit c37b6b9
Show file tree
Hide file tree
Showing 95 changed files with 298 additions and 168 deletions.
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
install:
pip install -e .[dev]

lint:
pylint all --rcfile=.pylintrc

test:
python -m unittest discover -s all -p "*test.py"

lint:
flake8 --ignore "E501,E731,E74,E402,F401,W503,E128" all

format:
autopep8 --in-place --aggressive --aggressive --ignore "E501,E731,E74,E402,F401,W503,E128" -r all

tensorboard:
tensorboard --logdir runs

Expand Down
Binary file added all/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions all/agents/_agent.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod
from all.optim import Schedulable


class Agent(ABC, Schedulable):
"""
A reinforcement learning agent.
Expand Down
2 changes: 1 addition & 1 deletion all/agents/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class A2C(Agent):
n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
writer (Writer): Used for logging.
"""

def __init__(
self,
features,
Expand Down Expand Up @@ -99,4 +100,3 @@ def _make_buffer(self):
self.n_envs,
discount_factor=self.discount_factor
)

2 changes: 2 additions & 0 deletions all/agents/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from torch.nn.functional import mse_loss
from ._agent import Agent


class DDPG(Agent):
"""
Deep Deterministic Policy Gradient (DDPG).
Expand All @@ -26,6 +27,7 @@ class DDPG(Agent):
replay_start_size (int): Number of experiences in replay buffer when training begins.
update_frequency (int): Number of timesteps per training update.
"""

def __init__(self,
q,
policy,
Expand Down
1 change: 1 addition & 0 deletions all/agents/ddqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class DDQN(Agent):
replay_start_size (int): Number of experiences in replay buffer when training begins.
update_frequency (int): Number of timesteps per training update.
'''

def __init__(self,
q,
policy,
Expand Down
4 changes: 2 additions & 2 deletions all/agents/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class DQN(Agent):
replay_start_size (int): Number of experiences in replay buffer when training begins.
update_frequency (int): Number of timesteps per training update.
'''

def __init__(self,
q,
policy,
Expand Down Expand Up @@ -75,5 +76,4 @@ def _train(self):

def _should_train(self):
self._frames_seen += 1
return (self._frames_seen > self.replay_start_size and
self._frames_seen % self.update_frequency == 0)
return (self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0)
2 changes: 1 addition & 1 deletion all/agents/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class PPO(Agent):
n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
writer (Writer): Used for logging.
"""

def __init__(
self,
features,
Expand Down Expand Up @@ -138,4 +139,3 @@ def _make_buffer(self):
discount_factor=self.discount_factor,
lam=self.lam
)

1 change: 1 addition & 0 deletions all/agents/rainbow.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .c51 import C51


class Rainbow(C51):
"""
Rainbow: Combining Improvements in Deep Reinforcement Learning.
Expand Down
2 changes: 2 additions & 0 deletions all/agents/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from all.logging import DummyWriter
from ._agent import Agent


class SAC(Agent):
"""
Soft Actor-Critic (SAC).
Expand All @@ -28,6 +29,7 @@ class SAC(Agent):
temperature_initial (float): The initial temperature used in the maximum entropy objective.
update_frequency (int): Number of timesteps per training update.
"""

def __init__(self,
policy,
q_1,
Expand Down
1 change: 1 addition & 0 deletions all/agents/vac.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class VAC(Agent):
n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
writer (Writer): Used for logging.
'''

def __init__(self, features, v, policy, discount_factor=1):
self.features = features
self.v = v
Expand Down
2 changes: 2 additions & 0 deletions all/agents/vpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from all.core import State
from ._agent import Agent


class VPG(Agent):
'''
Vanilla Policy Gradient (VPG/REINFORCE).
Expand All @@ -24,6 +25,7 @@ class VPG(Agent):
this many state-action pairs are seen. Set this to a large value in order
to train on multiple episodes at once.
'''

def __init__(
self,
features,
Expand Down
1 change: 1 addition & 0 deletions all/agents/vqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class VQN(Agent):
policy (GreedyPolicy): A policy derived from the Q-function.
discount_factor (float): Discount factor for future rewards.
'''

def __init__(self, q, policy, discount_factor=0.99):
self.q = q
self.policy = policy
Expand Down
1 change: 1 addition & 0 deletions all/agents/vsarsa.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class VSarsa(Agent):
policy (GreedyPolicy): A policy derived from the Q-function.
discount_factor (float): Discount factor for future rewards.
'''

def __init__(self, q, policy, discount_factor=0.99):
self.q = q
self.policy = policy
Expand Down
Binary file added all/approximation/.DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions all/approximation/approximation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

DEFAULT_CHECKPOINT_FREQUENCY = 200


class Approximation():
'''
Base function approximation object.
Expand Down Expand Up @@ -46,6 +47,7 @@ class Approximation():
The standard object logs to tensorboard, however, other types of Writer objects
may be implemented by the user.
'''

def __init__(
self,
model,
Expand Down
3 changes: 2 additions & 1 deletion all/approximation/feature_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class FeatureNetwork(Approximation):
The reinforce() function will then backpropagate the accumulated gradients on the output
through the original computation graph.
'''

def __init__(self, model, optimizer=None, name='feature', **kwargs):
model = FeatureModule(model)
super().__init__(model, optimizer, name=name, **kwargs)
Expand All @@ -30,7 +31,6 @@ def __call__(self, states):
'''
features = self.model(states)
graphs = features.observation
# pylint: disable=protected-access
observation = graphs.detach()
observation.requires_grad = True
features['observation'] = observation
Expand Down Expand Up @@ -60,6 +60,7 @@ def _dequeue(self):
self._out = []
return torch.cat(graphs), torch.cat(grads)


class FeatureModule(torch.nn.Module):
def __init__(self, model):
super().__init__()
Expand Down
2 changes: 2 additions & 0 deletions all/approximation/q_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from all.nn import RLNetwork
from .approximation import Approximation


class QContinuous(Approximation):
def __init__(
self,
Expand All @@ -18,6 +19,7 @@ def __init__(
**kwargs
)


class QContinuousModule(RLNetwork):
def forward(self, states, actions):
x = torch.cat((states.observation.float(), actions), dim=1)
Expand Down
1 change: 0 additions & 1 deletion all/approximation/q_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def __init__(
super().__init__(model, optimizer, name=name, **kwargs)

def project(self, dist, support):
# pylint: disable=invalid-name
target_dist = dist * 0
atoms = self.atoms
v_min = atoms[0]
Expand Down
49 changes: 25 additions & 24 deletions all/approximation/q_dist_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,24 +159,24 @@ def test_project_dist(self):
[-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
-6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
-3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030],
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030],
[-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
-6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
-3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030],
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030],
[-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
-6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
-3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030]
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030]
])
expected = torch.tensor([
[0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202,
Expand Down Expand Up @@ -229,24 +229,24 @@ def test_project_dist_cuda(self):
[-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
-6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
-3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030],
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030],
[-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
-6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
-3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030],
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030],
[-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
-6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
-3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030]
-0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
8.9268, 9.3149, 9.7030]
]).cuda()
expected = torch.tensor([
[0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202,
Expand All @@ -270,5 +270,6 @@ def test_project_dist_cuda(self):
])
tt.assert_almost_equal(q.project(dist, support).cpu(), expected.cpu(), decimal=3)


if __name__ == "__main__":
unittest.main()
2 changes: 2 additions & 0 deletions all/approximation/q_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from all.nn import RLNetwork
from .approximation import Approximation


class QNetwork(Approximation):
def __init__(
self,
Expand All @@ -18,6 +19,7 @@ def __init__(
**kwargs
)


class QModule(RLNetwork):
def forward(self, states, actions=None):
values = super().forward(states)
Expand Down
4 changes: 3 additions & 1 deletion all/approximation/q_network_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
STATE_DIM = 2
ACTIONS = 3


class TestQNetwork(unittest.TestCase):
def setUp(self):
torch.manual_seed(2)
self.model = nn.Sequential(
nn.Linear(STATE_DIM, ACTIONS)
)

def optimizer(params):
return torch.optim.SGD(params, lr=0.1)
self.q = QNetwork(self.model, optimizer)
Expand Down Expand Up @@ -46,7 +48,6 @@ def test_eval_actions(self):
self.assertEqual(result.shape, torch.Size([3]))
tt.assert_almost_equal(result, torch.tensor([-0.7262873, 0.3484948, -0.0296164]))


def test_target_net(self):
torch.manual_seed(2)
model = nn.Sequential(
Expand Down Expand Up @@ -93,5 +94,6 @@ def loss(policy_value):
np.testing.assert_equal(policy_value.item(), -0.8085841536521912)
np.testing.assert_equal(target_value, -0.6085841655731201)


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion all/approximation/target/abstract.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import abstractmethod, ABC

# pylint: disable=arguments-differ

class TargetNetwork(ABC):
@abstractmethod
def __call__(self, *inputs):
Expand Down
1 change: 1 addition & 0 deletions all/approximation/target/fixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import torch
from .abstract import TargetNetwork


class FixedTarget(TargetNetwork):
def __init__(self, update_frequency):
self._source = None
Expand Down
7 changes: 3 additions & 4 deletions all/approximation/target/polyak.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import torch
from .abstract import TargetNetwork


class PolyakTarget(TargetNetwork):
'''TargetNetwork that updates using polyak averaging'''

def __init__(self, rate):
self._source = None
self._target = None
Expand All @@ -19,7 +21,4 @@ def init(self, model):

def update(self):
for target_param, source_param in zip(self._target.parameters(), self._source.parameters()):
target_param.data.copy_(
target_param.data * (1.0 - self._rate) +
source_param.data * self._rate
)
target_param.data.copy_(target_param.data * (1.0 - self._rate) + source_param.data * self._rate)
1 change: 1 addition & 0 deletions all/approximation/target/trivial.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import torch
from .abstract import TargetNetwork


class TrivialTarget(TargetNetwork):
def __init__(self):
self._model = None
Expand Down

0 comments on commit c37b6b9

Please sign in to comment.