Change linter to Flake8 (#183)

* flake8 compliance * swap fix * fix import structure issue * fix 'not in' statements * fix unused args in atari wrappers * fix remaining flake8 errors * change linter to pep8 * remove pylint disables * trigger build Co-authored-by: Justin Terry <justinkterry@gmail.com>
cpnota · Nov 4, 2020 · c37b6b9 · c37b6b9
1 parent db2d1da
commit c37b6b9
Show file tree

Hide file tree

Showing 95 changed files with 298 additions and 168 deletions.
diff --git a/Makefile b/Makefile
@@ -1,12 +1,15 @@
 install:
 	pip install -e .[dev]
 
-lint:
-	pylint all --rcfile=.pylintrc
-
 test:
 	python -m unittest discover -s all -p "*test.py"
 
+lint:
+	flake8 --ignore "E501,E731,E74,E402,F401,W503,E128" all
+
+format:
+	autopep8 --in-place --aggressive --aggressive --ignore "E501,E731,E74,E402,F401,W503,E128" -r all
+
 tensorboard:
 	tensorboard --logdir runs
 

diff --git a/all/.DS_Store b/all/.DS_Store
diff --git a/all/agents/_agent.py b/all/agents/_agent.py
@@ -1,6 +1,7 @@
 from abc import ABC, abstractmethod
 from all.optim import Schedulable
 
+
 class Agent(ABC, Schedulable):
     """
     A reinforcement learning agent.

diff --git a/all/agents/a2c.py b/all/agents/a2c.py
@@ -24,6 +24,7 @@ class A2C(Agent):
         n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
         writer (Writer): Used for logging.
     """
+
     def __init__(
             self,
             features,
@@ -99,4 +100,3 @@ def _make_buffer(self):
             self.n_envs,
             discount_factor=self.discount_factor
         )
-
diff --git a/all/agents/ddpg.py b/all/agents/ddpg.py
@@ -3,6 +3,7 @@
 from torch.nn.functional import mse_loss
 from ._agent import Agent
 
+
 class DDPG(Agent):
     """
     Deep Deterministic Policy Gradient (DDPG).
@@ -26,6 +27,7 @@ class DDPG(Agent):
         replay_start_size (int): Number of experiences in replay buffer when training begins.
         update_frequency (int): Number of timesteps per training update.
     """
+
     def __init__(self,
                  q,
                  policy,

diff --git a/all/agents/ddqn.py b/all/agents/ddqn.py
@@ -24,6 +24,7 @@ class DDQN(Agent):
         replay_start_size (int): Number of experiences in replay buffer when training begins.
         update_frequency (int): Number of timesteps per training update.
     '''
+
     def __init__(self,
                  q,
                  policy,

diff --git a/all/agents/dqn.py b/all/agents/dqn.py
@@ -25,6 +25,7 @@ class DQN(Agent):
         replay_start_size (int): Number of experiences in replay buffer when training begins.
         update_frequency (int): Number of timesteps per training update.
     '''
+
     def __init__(self,
                  q,
                  policy,
@@ -75,5 +76,4 @@ def _train(self):
 
     def _should_train(self):
         self._frames_seen += 1
-        return (self._frames_seen > self.replay_start_size and
-                self._frames_seen % self.update_frequency == 0)
+        return (self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0)
diff --git a/all/agents/ppo.py b/all/agents/ppo.py
@@ -26,6 +26,7 @@ class PPO(Agent):
         n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
         writer (Writer): Used for logging.
     """
+
     def __init__(
             self,
             features,
@@ -138,4 +139,3 @@ def _make_buffer(self):
             discount_factor=self.discount_factor,
             lam=self.lam
         )
-
diff --git a/all/agents/rainbow.py b/all/agents/rainbow.py
@@ -1,5 +1,6 @@
 from .c51 import C51
 
+
 class Rainbow(C51):
     """
     Rainbow: Combining Improvements in Deep Reinforcement Learning.

diff --git a/all/agents/sac.py b/all/agents/sac.py
@@ -3,6 +3,7 @@
 from all.logging import DummyWriter
 from ._agent import Agent
 
+
 class SAC(Agent):
     """
     Soft Actor-Critic (SAC).
@@ -28,6 +29,7 @@ class SAC(Agent):
         temperature_initial (float): The initial temperature used in the maximum entropy objective.
         update_frequency (int): Number of timesteps per training update.
     """
+
     def __init__(self,
                  policy,
                  q_1,

diff --git a/all/agents/vac.py b/all/agents/vac.py
@@ -19,6 +19,7 @@ class VAC(Agent):
         n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
         writer (Writer): Used for logging.
     '''
+
     def __init__(self, features, v, policy, discount_factor=1):
         self.features = features
         self.v = v

diff --git a/all/agents/vpg.py b/all/agents/vpg.py
@@ -3,6 +3,7 @@
 from all.core import State
 from ._agent import Agent
 
+
 class VPG(Agent):
     '''
     Vanilla Policy Gradient (VPG/REINFORCE).
@@ -24,6 +25,7 @@ class VPG(Agent):
             this many state-action pairs are seen. Set this to a large value in order
             to train on multiple episodes at once.
     '''
+
     def __init__(
             self,
             features,

diff --git a/all/agents/vqn.py b/all/agents/vqn.py
@@ -18,6 +18,7 @@ class VQN(Agent):
         policy (GreedyPolicy): A policy derived from the Q-function.
         discount_factor (float): Discount factor for future rewards.
     '''
+
     def __init__(self, q, policy, discount_factor=0.99):
         self.q = q
         self.policy = policy

diff --git a/all/agents/vsarsa.py b/all/agents/vsarsa.py
@@ -15,6 +15,7 @@ class VSarsa(Agent):
         policy (GreedyPolicy): A policy derived from the Q-function.
         discount_factor (float): Discount factor for future rewards.
     '''
+
     def __init__(self, q, policy, discount_factor=0.99):
         self.q = q
         self.policy = policy

diff --git a/all/approximation/.DS_Store b/all/approximation/.DS_Store
diff --git a/all/approximation/approximation.py b/all/approximation/approximation.py
@@ -7,6 +7,7 @@
 
 DEFAULT_CHECKPOINT_FREQUENCY = 200
 
+
 class Approximation():
     '''
     Base function approximation object.
@@ -46,6 +47,7 @@ class Approximation():
                 The standard object logs to tensorboard, however, other types of Writer objects
                 may be implemented by the user.
     '''
+
     def __init__(
             self,
             model,

diff --git a/all/approximation/feature_network.py b/all/approximation/feature_network.py
@@ -12,6 +12,7 @@ class FeatureNetwork(Approximation):
     The reinforce() function will then backpropagate the accumulated gradients on the output
     through the original computation graph.
     '''
+
     def __init__(self, model, optimizer=None, name='feature', **kwargs):
         model = FeatureModule(model)
         super().__init__(model, optimizer, name=name, **kwargs)
@@ -30,7 +31,6 @@ def __call__(self, states):
         '''
         features = self.model(states)
         graphs = features.observation
-        # pylint: disable=protected-access
         observation = graphs.detach()
         observation.requires_grad = True
         features['observation'] = observation
@@ -60,6 +60,7 @@ def _dequeue(self):
         self._out = []
         return torch.cat(graphs), torch.cat(grads)
 
+
 class FeatureModule(torch.nn.Module):
     def __init__(self, model):
         super().__init__()

diff --git a/all/approximation/q_continuous.py b/all/approximation/q_continuous.py
@@ -2,6 +2,7 @@
 from all.nn import RLNetwork
 from .approximation import Approximation
 
+
 class QContinuous(Approximation):
     def __init__(
             self,
@@ -18,6 +19,7 @@ def __init__(
             **kwargs
         )
 
+
 class QContinuousModule(RLNetwork):
     def forward(self, states, actions):
         x = torch.cat((states.observation.float(), actions), dim=1)

diff --git a/all/approximation/q_dist.py b/all/approximation/q_dist.py
@@ -23,7 +23,6 @@ def __init__(
         super().__init__(model, optimizer, name=name, **kwargs)
 
     def project(self, dist, support):
-        # pylint: disable=invalid-name
         target_dist = dist * 0
         atoms = self.atoms
         v_min = atoms[0]

diff --git a/all/approximation/q_dist_test.py b/all/approximation/q_dist_test.py
@@ -159,24 +159,24 @@ def test_project_dist(self):
             [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
             -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
             -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-            -0.3881,  0.0000,  0.3881,  0.7762,  1.1644,  1.5525,  1.9406,  2.3287,
-            2.7168,  3.1050,  3.4931,  3.8812,  4.2693,  4.6574,  5.0456,  5.4337,
-            5.8218,  6.2099,  6.5980,  6.9862,  7.3743,  7.7624,  8.1505,  8.5386,
-            8.9268,  9.3149,  9.7030],
+            -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
+            2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
+            5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
+            8.9268, 9.3149, 9.7030],
             [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
             -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
             -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-            -0.3881,  0.0000,  0.3881,  0.7762,  1.1644,  1.5525,  1.9406,  2.3287,
-            2.7168,  3.1050,  3.4931,  3.8812,  4.2693,  4.6574,  5.0456,  5.4337,
-            5.8218,  6.2099,  6.5980,  6.9862,  7.3743,  7.7624,  8.1505,  8.5386,
-            8.9268,  9.3149,  9.7030],
+            -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
+            2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
+            5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
+            8.9268, 9.3149, 9.7030],
             [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
             -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
             -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-            -0.3881,  0.0000,  0.3881,  0.7762,  1.1644,  1.5525,  1.9406,  2.3287,
-            2.7168,  3.1050,  3.4931,  3.8812,  4.2693,  4.6574,  5.0456,  5.4337,
-            5.8218,  6.2099,  6.5980,  6.9862,  7.3743,  7.7624,  8.1505,  8.5386,
-            8.9268,  9.3149,  9.7030]
+            -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
+            2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
+            5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
+            8.9268, 9.3149, 9.7030]
         ])
         expected = torch.tensor([
             [0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202,
@@ -229,24 +229,24 @@ def test_project_dist_cuda(self):
                 [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
                 -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
                 -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-                -0.3881,  0.0000,  0.3881,  0.7762,  1.1644,  1.5525,  1.9406,  2.3287,
-                2.7168,  3.1050,  3.4931,  3.8812,  4.2693,  4.6574,  5.0456,  5.4337,
-                5.8218,  6.2099,  6.5980,  6.9862,  7.3743,  7.7624,  8.1505,  8.5386,
-                8.9268,  9.3149,  9.7030],
+                -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
+                2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
+                5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
+                8.9268, 9.3149, 9.7030],
                 [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
                 -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
                 -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-                -0.3881,  0.0000,  0.3881,  0.7762,  1.1644,  1.5525,  1.9406,  2.3287,
-                2.7168,  3.1050,  3.4931,  3.8812,  4.2693,  4.6574,  5.0456,  5.4337,
-                5.8218,  6.2099,  6.5980,  6.9862,  7.3743,  7.7624,  8.1505,  8.5386,
-                8.9268,  9.3149,  9.7030],
+                -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
+                2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
+                5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
+                8.9268, 9.3149, 9.7030],
                 [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862,
                 -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812,
                 -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762,
-                -0.3881,  0.0000,  0.3881,  0.7762,  1.1644,  1.5525,  1.9406,  2.3287,
-                2.7168,  3.1050,  3.4931,  3.8812,  4.2693,  4.6574,  5.0456,  5.4337,
-                5.8218,  6.2099,  6.5980,  6.9862,  7.3743,  7.7624,  8.1505,  8.5386,
-                8.9268,  9.3149,  9.7030]
+                -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287,
+                2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337,
+                5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386,
+                8.9268, 9.3149, 9.7030]
             ]).cuda()
             expected = torch.tensor([
                 [0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202,
@@ -270,5 +270,6 @@ def test_project_dist_cuda(self):
             ])
             tt.assert_almost_equal(q.project(dist, support).cpu(), expected.cpu(), decimal=3)
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/all/approximation/q_network.py b/all/approximation/q_network.py
@@ -2,6 +2,7 @@
 from all.nn import RLNetwork
 from .approximation import Approximation
 
+
 class QNetwork(Approximation):
     def __init__(
             self,
@@ -18,6 +19,7 @@ def __init__(
             **kwargs
         )
 
+
 class QModule(RLNetwork):
     def forward(self, states, actions=None):
         values = super().forward(states)

diff --git a/all/approximation/q_network_test.py b/all/approximation/q_network_test.py
@@ -10,12 +10,14 @@
 STATE_DIM = 2
 ACTIONS = 3
 
+
 class TestQNetwork(unittest.TestCase):
     def setUp(self):
         torch.manual_seed(2)
         self.model = nn.Sequential(
             nn.Linear(STATE_DIM, ACTIONS)
         )
+
         def optimizer(params):
             return torch.optim.SGD(params, lr=0.1)
         self.q = QNetwork(self.model, optimizer)
@@ -46,7 +48,6 @@ def test_eval_actions(self):
         self.assertEqual(result.shape, torch.Size([3]))
         tt.assert_almost_equal(result, torch.tensor([-0.7262873, 0.3484948, -0.0296164]))
 
-
     def test_target_net(self):
         torch.manual_seed(2)
         model = nn.Sequential(
@@ -93,5 +94,6 @@ def loss(policy_value):
         np.testing.assert_equal(policy_value.item(), -0.8085841536521912)
         np.testing.assert_equal(target_value, -0.6085841655731201)
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/all/approximation/target/abstract.py b/all/approximation/target/abstract.py
@@ -1,6 +1,6 @@
 from abc import abstractmethod, ABC
 
-# pylint: disable=arguments-differ
+
 class TargetNetwork(ABC):
     @abstractmethod
     def __call__(self, *inputs):

diff --git a/all/approximation/target/fixed.py b/all/approximation/target/fixed.py
@@ -2,6 +2,7 @@
 import torch
 from .abstract import TargetNetwork
 
+
 class FixedTarget(TargetNetwork):
     def __init__(self, update_frequency):
         self._source = None

diff --git a/all/approximation/target/polyak.py b/all/approximation/target/polyak.py
@@ -2,8 +2,10 @@
 import torch
 from .abstract import TargetNetwork
 
+
 class PolyakTarget(TargetNetwork):
     '''TargetNetwork that updates using polyak averaging'''
+
     def __init__(self, rate):
         self._source = None
         self._target = None
@@ -19,7 +21,4 @@ def init(self, model):
 
     def update(self):
         for target_param, source_param in zip(self._target.parameters(), self._source.parameters()):
-            target_param.data.copy_(
-                target_param.data * (1.0 - self._rate) +
-                source_param.data * self._rate
-            )
+            target_param.data.copy_(target_param.data * (1.0 - self._rate) + source_param.data * self._rate)
diff --git a/all/approximation/target/trivial.py b/all/approximation/target/trivial.py
@@ -1,6 +1,7 @@
 import torch
 from .abstract import TargetNetwork
 
+
 class TrivialTarget(TargetNetwork):
     def __init__(self):
         self._model = None