Skip to content

Commit

Permalink
mode dependent controllers (#84)
Browse files Browse the repository at this point in the history
  • Loading branch information
aaspeel committed Dec 2, 2020
1 parent ead6b8a commit 9c0e2ef
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 26 deletions.
8 changes: 6 additions & 2 deletions deer/agent.py
Expand Up @@ -289,7 +289,6 @@ def _run_train(self, n_epochs, epoch_length):
i = 0
while i < n_epochs:
self._training_loss_averages = []

while epoch_length > 0: # run new episodes until the number of steps left for the epoch has reached 0
epoch_length = self._runEpisode(epoch_length)
i += 1
Expand All @@ -300,7 +299,7 @@ def _run_train(self, n_epochs, epoch_length):

def _run_non_train(self, n_epochs, epoch_length):
"""
This function runs a number of epochs in non train mode (id > -1), thus without controllers.
This function runs a number of epochs in non train mode (id > -1).
Parameters
-----------
Expand All @@ -309,13 +308,18 @@ def _run_non_train(self, n_epochs, epoch_length):
epoch_length : int
maximum number of steps for a given epoch
"""
for c in self._controllers: c.onStart(self)
i = 0
while i < n_epochs:
self._totalModeNbrEpisode=0
while epoch_length > 0:
self._totalModeNbrEpisode += 1
epoch_length = self._runEpisode(epoch_length)
i += 1
for c in self._controllers: c.onEpochEnd(self)

self._environment.end()
for c in self._controllers: c.onEnd(self)

def _runEpisode(self, maxSteps):
"""
Expand Down
52 changes: 28 additions & 24 deletions deer/experiment/base_controllers.py
Expand Up @@ -25,6 +25,7 @@ def __init__(self):
"""

self._active = True
self._modes = [-1]

def setActive(self, active):
"""Activate or deactivate this controller.
Expand Down Expand Up @@ -106,7 +107,7 @@ def onEnd(self, agent):


class LearningRateController(Controller):
"""A controller that modifies the learning rate periodically upon epochs end.
"""A controller that modifies the learning rate periodically upon epochs end (only in training mode, i.e., agent.mode() == -1).
Parameters
----------
Expand All @@ -130,15 +131,15 @@ def __init__(self, initial_learning_rate=0.005, learning_rate_decay=1., periodic
self._periodicity = periodicity

def onStart(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

self._epoch_count = 0
agent._learning_algo.setLearningRate(self._init_lr)
self._lr = self._init_lr * self._lr_decay

def onEpochEnd(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

self._epoch_count += 1
Expand All @@ -147,7 +148,7 @@ def onEpochEnd(self, agent):
self._lr *= self._lr_decay

class EpsilonController(Controller):
""" A controller that modifies the probability "epsilon" of taking a random action periodically.
""" A controller that modifies the probability "epsilon" of taking a random action periodically (only in training mode, i.e., agent.mode() == -1).
Parameters
----------
Expand Down Expand Up @@ -188,13 +189,13 @@ def __init__(self, initial_e=1., e_decays=10000, e_min=0.1, evaluate_on='action'
self._reset_on_epoch = 'epoch' == reset_every

def onStart(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

self._reset(agent)

def onEpisodeEnd(self, agent, terminal_reached, reward):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._reset_on_episode:
Expand All @@ -203,7 +204,7 @@ def onEpisodeEnd(self, agent, terminal_reached, reward):
self._update(agent)

def onEpochEnd(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._reset_on_epoch:
Expand All @@ -212,7 +213,7 @@ def onEpochEnd(self, agent):
self._update(agent)

def onActionChosen(self, agent, action):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._on_action:
Expand All @@ -232,7 +233,7 @@ def _update(self, agent):


class DiscountFactorController(Controller):
"""A controller that modifies the q-network discount periodically.
"""A controller that modifies the q-network discount periodically (only in training mode, i.e., agent.mode() == -1).
More informations in : Francois-Lavet Vincent et al. (2015) - How to Discount Deep Reinforcement Learning: Towards New Dynamic Strategies (http://arxiv.org/abs/1512.02011).
Parameters
Expand Down Expand Up @@ -261,7 +262,7 @@ def __init__(self, initial_discount_factor=0.9, discount_factor_growth=1., disco
self._periodicity = periodicity

def onStart(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

self._epoch_count = 0
Expand All @@ -272,7 +273,7 @@ def onStart(self, agent):
self._df = self._init_df

def onEpochEnd(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

self._epoch_count += 1
Expand All @@ -283,7 +284,7 @@ def onEpochEnd(self, agent):


class InterleavedTestEpochController(Controller):
"""A controller that interleaves a valid/test epoch between training epochs of the agent.
"""A controller that interleaves a valid/test epoch between training epochs of the agent (only in training mode, i.e., agent.mode() == -1).
Parameters
----------
Expand Down Expand Up @@ -321,13 +322,13 @@ def __init__(self, id=0, epoch_length=500, periodicity=1, show_score=True, summa
self.scores=[]

def onStart(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

self._epoch_count = 0

def onEpochEnd(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

mod = self._epoch_count % self._periodicity
Expand All @@ -347,7 +348,7 @@ def onEpochEnd(self, agent):


class TrainerController(Controller):
"""A controller that makes the agent train on its current database periodically.
"""A controller that makes the agent train on its current database periodically (only in training mode, i.e., agent.mode() == -1).
Parameters
----------
Expand Down Expand Up @@ -379,13 +380,13 @@ def __init__(self, evaluate_on='action', periodicity=1, show_episode_avg_V_value
self._on_action = True

def onStart(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

self._count = 0

def onEpisodeEnd(self, agent, terminal_reached, reward):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._on_episode:
Expand All @@ -395,14 +396,14 @@ def onEpisodeEnd(self, agent, terminal_reached, reward):
if self._show_episode_avg_V_value: print("Episode average V value: {}".format(agent.avgEpisodeVValue())) # (on non-random action time-steps)

def onEpochEnd(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._on_epoch:
self._update(agent)

def onActionTaken(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._on_action:
Expand All @@ -429,15 +430,18 @@ class VerboseController(Controller):
'action', 'episode', 'epoch'. The first printing will occur after the first occurence of [evaluateOn].
periodicity : int
How many [evaluateOn] are necessary before a printing occurs
modes : list of int
List of agent modes for which this controller is used
"""

def __init__(self, evaluateOn=False, evaluate_on='epoch', periodicity=1):
def __init__(self, evaluateOn=False, evaluate_on='epoch', periodicity=1, modes=[-1]):
"""Initializer.
"""
if evaluateOn is not False:
raise Exception('For uniformity the attributes to be provided to the controllers respect PEP8 from deer0.3dev1 onwards. For instance, instead of "evaluateOn", you should now have "evaluate_on". Please have a look at https://github.com/VinF/deer/issues/28.')

super(self.__class__, self).__init__()
self._modes = modes
self._count = 0
self._periodicity = periodicity
self._string = evaluate_on
Expand All @@ -449,27 +453,27 @@ def __init__(self, evaluateOn=False, evaluate_on='epoch', periodicity=1):
self._on_epoch = True

def onStart(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

self._count = 0

def onEpisodeEnd(self, agent, terminal_reached, reward):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._on_episode:
self._print(agent)

def onEpochEnd(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._on_epoch:
self._print(agent)

def onActionTaken(self, agent):
if (self._active == False):
if (self._active == False) or (agent.mode() not in self._modes):
return

if self._on_action:
Expand Down

0 comments on commit 9c0e2ef

Please sign in to comment.