mode dependent controllers (#84)

VinF · Dec 2, 2020 · 9c0e2ef · 9c0e2ef
1 parent ead6b8a
commit 9c0e2ef
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 26 deletions.
diff --git a/deer/agent.py b/deer/agent.py
@@ -289,7 +289,6 @@ def _run_train(self, n_epochs, epoch_length):
         i = 0
         while i < n_epochs:
             self._training_loss_averages = []
-
             while epoch_length > 0: # run new episodes until the number of steps left for the epoch has reached 0
                 epoch_length = self._runEpisode(epoch_length)
             i += 1
@@ -300,7 +299,7 @@ def _run_train(self, n_epochs, epoch_length):
 
     def _run_non_train(self, n_epochs, epoch_length):
         """
-        This function runs a number of epochs in non train mode (id > -1), thus without controllers.
+        This function runs a number of epochs in non train mode (id > -1).
 
         Parameters
         -----------
@@ -309,13 +308,18 @@ def _run_non_train(self, n_epochs, epoch_length):
         epoch_length : int
             maximum number of steps for a given epoch
         """
+        for c in self._controllers: c.onStart(self)
         i = 0
         while i < n_epochs:
             self._totalModeNbrEpisode=0
             while epoch_length > 0:
                 self._totalModeNbrEpisode += 1
                 epoch_length = self._runEpisode(epoch_length)
             i += 1
+            for c in self._controllers: c.onEpochEnd(self)
+
+        self._environment.end()
+        for c in self._controllers: c.onEnd(self)
 
     def _runEpisode(self, maxSteps):
         """

diff --git a/deer/experiment/base_controllers.py b/deer/experiment/base_controllers.py
@@ -25,6 +25,7 @@ def __init__(self):
         """
 
         self._active = True
+        self._modes = [-1]
 
     def setActive(self, active):
         """Activate or deactivate this controller.
@@ -106,7 +107,7 @@ def onEnd(self, agent):
 
 
 class LearningRateController(Controller):
-    """A controller that modifies the learning rate periodically upon epochs end.
+    """A controller that modifies the learning rate periodically upon epochs end (only in training mode, i.e., agent.mode() == -1).
     
     Parameters
     ----------
@@ -130,15 +131,15 @@ def __init__(self, initial_learning_rate=0.005, learning_rate_decay=1., periodic
         self._periodicity = periodicity
 
     def onStart(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         self._epoch_count = 0
         agent._learning_algo.setLearningRate(self._init_lr)
         self._lr = self._init_lr * self._lr_decay
 
     def onEpochEnd(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         self._epoch_count += 1
@@ -147,7 +148,7 @@ def onEpochEnd(self, agent):
             self._lr *= self._lr_decay
 
 class EpsilonController(Controller):
-    """ A controller that modifies the probability "epsilon" of taking a random action periodically.
+    """ A controller that modifies the probability "epsilon" of taking a random action periodically (only in training mode, i.e., agent.mode() == -1).
     
     Parameters
     ----------
@@ -188,13 +189,13 @@ def __init__(self, initial_e=1., e_decays=10000, e_min=0.1, evaluate_on='action'
         self._reset_on_epoch = 'epoch' == reset_every
 
     def onStart(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         self._reset(agent)
 
     def onEpisodeEnd(self, agent, terminal_reached, reward):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._reset_on_episode:
@@ -203,7 +204,7 @@ def onEpisodeEnd(self, agent, terminal_reached, reward):
             self._update(agent)
 
     def onEpochEnd(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._reset_on_epoch:
@@ -212,7 +213,7 @@ def onEpochEnd(self, agent):
             self._update(agent)
 
     def onActionChosen(self, agent, action):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._on_action:
@@ -232,7 +233,7 @@ def _update(self, agent):
 
 
 class DiscountFactorController(Controller):
-    """A controller that modifies the q-network discount periodically.
+    """A controller that modifies the q-network discount periodically (only in training mode, i.e., agent.mode() == -1).
     More informations in : Francois-Lavet Vincent et al. (2015) - How to Discount Deep Reinforcement Learning: Towards New Dynamic Strategies (http://arxiv.org/abs/1512.02011).
 
     Parameters
@@ -261,7 +262,7 @@ def __init__(self, initial_discount_factor=0.9, discount_factor_growth=1., disco
         self._periodicity = periodicity
 
     def onStart(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         self._epoch_count = 0
@@ -272,7 +273,7 @@ def onStart(self, agent):
             self._df = self._init_df
 
     def onEpochEnd(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         self._epoch_count += 1
@@ -283,7 +284,7 @@ def onEpochEnd(self, agent):
 
 
 class InterleavedTestEpochController(Controller):
-    """A controller that interleaves a valid/test epoch between training epochs of the agent.
+    """A controller that interleaves a valid/test epoch between training epochs of the agent (only in training mode, i.e., agent.mode() == -1).
     
     Parameters
     ----------
@@ -321,13 +322,13 @@ def __init__(self, id=0, epoch_length=500, periodicity=1, show_score=True, summa
         self.scores=[]
 
     def onStart(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         self._epoch_count = 0
 
     def onEpochEnd(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         mod = self._epoch_count % self._periodicity
@@ -347,7 +348,7 @@ def onEpochEnd(self, agent):
 
 
 class TrainerController(Controller):
-    """A controller that makes the agent train on its current database periodically.
+    """A controller that makes the agent train on its current database periodically (only in training mode, i.e., agent.mode() == -1).
 
     Parameters
     ----------
@@ -379,13 +380,13 @@ def __init__(self, evaluate_on='action', periodicity=1, show_episode_avg_V_value
             self._on_action = True
 
     def onStart(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         self._count = 0
 
     def onEpisodeEnd(self, agent, terminal_reached, reward):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._on_episode:
@@ -395,14 +396,14 @@ def onEpisodeEnd(self, agent, terminal_reached, reward):
         if self._show_episode_avg_V_value: print("Episode average V value: {}".format(agent.avgEpisodeVValue())) # (on non-random action time-steps)
 
     def onEpochEnd(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._on_epoch:
             self._update(agent)
 
     def onActionTaken(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._on_action:
@@ -429,15 +430,18 @@ class VerboseController(Controller):
         'action', 'episode', 'epoch'. The first printing will occur after the first occurence of [evaluateOn].
     periodicity : int
         How many [evaluateOn] are necessary before a printing occurs
+    modes : list of int
+        List of agent modes for which this controller is used
     """
 
-    def __init__(self, evaluateOn=False, evaluate_on='epoch', periodicity=1):
+    def __init__(self, evaluateOn=False, evaluate_on='epoch', periodicity=1, modes=[-1]):
         """Initializer.
         """
         if evaluateOn is not False:
             raise Exception('For uniformity the attributes to be provided to the controllers respect PEP8 from deer0.3dev1 onwards. For instance, instead of "evaluateOn", you should now have "evaluate_on". Please have a look at https://github.com/VinF/deer/issues/28.')
 
         super(self.__class__, self).__init__()
+        self._modes = modes
         self._count = 0
         self._periodicity = periodicity
         self._string = evaluate_on
@@ -449,27 +453,27 @@ def __init__(self, evaluateOn=False, evaluate_on='epoch', periodicity=1):
             self._on_epoch = True
 
     def onStart(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         self._count = 0
 
     def onEpisodeEnd(self, agent, terminal_reached, reward):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._on_episode:
             self._print(agent)
 
     def onEpochEnd(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._on_epoch:
             self._print(agent)
 
     def onActionTaken(self, agent):
-        if (self._active == False):
+        if (self._active == False) or (agent.mode() not in self._modes):
             return
 
         if self._on_action: