forked from alito/becca
-
Notifications
You must be signed in to change notification settings - Fork 46
/
postprocessor.py
105 lines (93 loc) · 4.06 KB
/
postprocessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import os
import logging
logging.basicConfig(filename='log/log.log', level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(name)s %(message)s')
logger = logging.getLogger(os.path.basename(__file__))
class Postprocessor(object):
"""
The Postprocessor creates a set of discrete commands based on
the actions
expected by the world. At each time step it translates the current set
of commands into actions. All the actions it provides will be floats
between zero and one.
"""
def __init__(
self,
n_commands_per_action=2,
n_actions=None,
):
"""
Parameters
----------
_commands_per_action: int
The number of discretized actions per raw action. This determines
the resolution of discretization
n_actions: int
The number of actions that the world is expecting.
"""
# TODO: Make discretization adaptive in number and magnitude.
# TODO: Check for valid arguments.
if not n_actions:
logger.error('You have to give a number for n_actions.')
return
self.n_actions = n_actions
self.n_commands_per_action = n_commands_per_action
self.n_commands = self.n_actions * self.n_commands_per_action
# Keep a running record of recent internal values
# for visualization.
self.command_activities = np.zeros(self.n_commands)
self.commands = np.zeros(self.n_commands)
self.consolidated_commands = np.zeros(self.n_commands)
self.previous_commands = np.zeros(self.n_commands)
self.actions = np.zeros(self.n_actions)
# The mapping helps to convert from discretized actions to
# raw actions. Each row represents a raw action.
self.mapping = (np.cumsum(np.ones(
(self.n_actions, self.n_commands_per_action)), axis=1) /
self.n_commands_per_action)
def convert_to_actions(self, command_activities):
"""
Construct a set of actions from the command_activities.
Parameters
----------
command_activities: array of floats
The likelihood that each of the discrete commands will be
put into effect.
Returns
-------
self.consolidated_commands: array of floats
The minimal set of commands that were actually
implemented. Larger commands for a given action eclipse
smaller ones.
actions: array of floats
A set of actions for the world, each between 0 and 1.
"""
self.command_activities = command_activities
# command_activities can be between 0 and 1. This value
# represents a proabaility that the action will be taken.
# First, roll the dice and see which actions are commanded.
self.commands = np.zeros(self.n_commands)
self.commands[np.where(np.random.random_sample()
< self.command_activities)] = 1
# Find the magnitudes of each of the commanded actions.
action_commands = self.mapping * np.reshape(
self.commands, (self.n_actions, -1))
# Only keep the largest command for each action
self.actions = np.max(action_commands, axis=1)
# Find the discretized representation of the actions
# that were finally issued.
# These are used (delayed by one time step) to let
# the model know what it did, so that it can learn
# the appropriate model.
self.previous_commands = self.consolidated_commands
self.consolidated_commands = np.zeros(self.n_commands)
for i_action in range(self.n_actions):
if self.actions[i_action] > 0:
i_consolidated = (
i_action * self.n_commands_per_action
+ np.where(action_commands[i_action, :] > 0)[0][-1])
self.consolidated_commands[i_consolidated] = 1
# TODO: Consider adding fatigue.
# It's not clear whether it will be helpful or not.
return self.actions