-
Notifications
You must be signed in to change notification settings - Fork 1
/
approach_env.go
320 lines (272 loc) · 8.41 KB
/
approach_env.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
// Copyright (c) 2020, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
"math/rand"
"github.com/emer/emergent/env"
"github.com/emer/emergent/erand"
"github.com/emer/emergent/evec"
"github.com/emer/emergent/patgen"
"github.com/emer/etable/etensor"
)
// Approach implements CS-guided approach to desired outcomes.
// Each location contains a US which satisfies a different drive.
type Approach struct {
// name of environment -- Train or Test
Nm string `desc:"name of environment -- Train or Test"`
// number of different drive-like body states (hunger, thirst, etc), that are satisfied by a corresponding US outcome
Drives int `desc:"number of different drive-like body states (hunger, thirst, etc), that are satisfied by a corresponding US outcome"`
// number of different CS sensory cues associated with each US (simplest case is 1 -- one-to-one mapping), presented on a fovea input layer
CSPerDrive int `desc:"number of different CS sensory cues associated with each US (simplest case is 1 -- one-to-one mapping), presented on a fovea input layer"`
// number of different locations -- always <= number of drives -- drives have a unique location
Locations int `desc:"number of different locations -- always <= number of drives -- drives have a unique location"`
// maximum distance in time steps to reach the US
DistMax int `desc:"maximum distance in time steps to reach the US"`
// maximum number of time steps before resetting
TimeMax int `desc:"maximum number of time steps before resetting"`
// interval in trials for generating a new state, only if > 0
NewStateInt int `desc:"interval in trials for generating a new state, only if > 0"`
// total number of CS's = Drives * CSPerDrive
CSTot int `desc:"total number of CS's = Drives * CSPerDrive"`
// number of Y-axis repetitions of localist stimuli -- for redundancy in spiking nets
NYReps int `desc:"number of Y-axis repetitions of localist stimuli -- for redundancy in spiking nets"`
// size of CS patterns
PatSize evec.Vec2i `desc:"size of CS patterns"`
// list of actions
Acts []string `desc:"list of actions"`
// action map of action names to indexes
ActMap map[string]int `desc:"action map of action names to indexes"`
// named states -- e.g., USs, CSs, etc
States map[string]*etensor.Float32 `desc:"named states -- e.g., USs, CSs, etc"`
// target position where Drive US is
TrgPos int `desc:"target position where Drive US is"`
// current drive state
Drive int `desc:"current drive state"`
// current distance
Dist int `desc:"current distance"`
// current time, counting up until starting over
Time int `desc:"current time, counting up until starting over"`
// current position being looked at
Pos int `desc:"current position being looked at"`
// reward
Rew float32 `desc:"reward"`
// US is -1 unless consumed at Dist = 0
US int `desc:"US is -1 unless consumed at Dist = 0"`
// count up for generating a new state
StateCtr int `desc:"count up for generating a new state"`
// last action taken
LastAct int `desc:"last action taken"`
}
func (ev *Approach) Name() string {
return ev.Nm
}
func (ev *Approach) Desc() string {
return "Approach"
}
// Defaults sets default params
func (ev *Approach) Defaults() {
ev.Acts = []string{"Forward", "Left", "Right", "Consume"}
ev.Drives = 4
ev.CSPerDrive = 1
ev.Locations = 4 // <= drives always
ev.DistMax = 4
ev.TimeMax = 10
ev.NewStateInt = -1
ev.NYReps = 4
ev.PatSize.Set(6, 6)
// ev.PopCode.Defaults()
// ev.PopCode.SetRange(-0.2, 1.2, 0.1)
}
// Config configures the world
func (ev *Approach) Config() {
ev.CSTot = ev.Drives * ev.CSPerDrive
ev.ActMap = make(map[string]int)
for i, act := range ev.Acts {
ev.ActMap[act] = i
}
ev.States = make(map[string]*etensor.Float32)
ev.States["USs"] = etensor.NewFloat32([]int{ev.Locations}, nil, nil)
ev.States["CSs"] = etensor.NewFloat32([]int{ev.Locations}, nil, nil)
ev.States["Pos"] = etensor.NewFloat32([]int{ev.NYReps, ev.Locations}, nil, nil)
ev.States["Drives"] = etensor.NewFloat32([]int{ev.NYReps, ev.Drives}, nil, nil)
ev.States["US"] = etensor.NewFloat32([]int{ev.NYReps, ev.Drives + 1}, nil, nil)
ev.States["CS"] = etensor.NewFloat32([]int{ev.PatSize.Y, ev.PatSize.X}, nil, nil)
ev.States["Dist"] = etensor.NewFloat32([]int{ev.NYReps, ev.DistMax}, nil, nil)
ev.States["Time"] = etensor.NewFloat32([]int{ev.NYReps, ev.TimeMax}, nil, nil)
ev.States["Rew"] = etensor.NewFloat32([]int{1, 1}, nil, nil)
ev.States["Action"] = etensor.NewFloat32([]int{1, len(ev.Acts)}, nil, nil)
ev.ConfigPats()
ev.NewState()
}
// ConfigPats generates patterns for CS's
func (ev *Approach) ConfigPats() {
pats := etensor.NewFloat32([]int{ev.CSTot, ev.PatSize.Y, ev.PatSize.X}, nil, nil)
patgen.PermutedBinaryMinDiff(pats, 6, 1, 0, 3)
ev.States["Pats"] = pats
}
func (ev *Approach) Validate() error {
return nil
}
func (ev *Approach) Init(run int) {
ev.Config()
}
func (ev *Approach) Counter(scale env.TimeScales) (cur, prv int, changed bool) {
return 0, 0, false
}
func (ev *Approach) State(el string) etensor.Tensor {
return ev.States[el]
}
// NewState configures new set of USs in locations
func (ev *Approach) NewState() {
uss := ev.States["USs"]
css := ev.States["CSs"]
drives := rand.Perm(ev.Drives)
for l := 0; l < ev.Locations; l++ {
us := drives[l]
cs := rand.Intn(ev.CSPerDrive)
pat := us*ev.CSPerDrive + cs
uss.Values[l] = float32(us)
css.Values[l] = float32(pat)
}
ev.StateCtr = 0
ev.NewStart()
}
// PatToUS returns US no and CS no from pat no
func (ev *Approach) PatToUS(pat int) (us, cs int) {
us = pat / ev.CSPerDrive
cs = pat % ev.CSPerDrive
return
}
// NewStart starts a new approach run
func (ev *Approach) NewStart() {
ev.StateCtr++
if ev.NewStateInt > 0 && ev.StateCtr >= ev.NewStateInt {
ev.NewState()
}
ev.Dist = 1 + rand.Intn(ev.DistMax-1)
ev.Time = 0
ev.Pos = rand.Intn(ev.Locations)
ev.TrgPos = rand.Intn(ev.Locations)
uss := ev.States["USs"]
ev.Drive = int(uss.Values[ev.TrgPos])
ev.US = -1
ev.Rew = 0
ev.RenderState()
ev.RenderRewUS()
}
// RenderLocalist renders one localist state
func (ev *Approach) RenderLocalist(name string, val int) {
st := ev.States[name]
st.SetZeros()
for y := 0; y < ev.NYReps; y++ {
st.Set([]int{y, val}, 1.0)
}
}
// RenderState renders the current state
func (ev *Approach) RenderState() {
ev.RenderLocalist("Pos", ev.Pos)
ev.RenderLocalist("Drives", ev.Drive)
ev.RenderLocalist("Dist", ev.Dist)
ev.RenderLocalist("Time", ev.Time)
css := ev.States["CSs"]
patn := int(css.Values[ev.Pos])
pats := ev.States["Pats"]
pat := pats.SubSpace([]int{patn})
cs := ev.States["CS"]
cs.CopyFrom(pat)
}
// RenderRewUS renders reward and US
func (ev *Approach) RenderRewUS() {
if ev.US < 0 {
ev.RenderLocalist("US", ev.Drives)
} else {
ev.RenderLocalist("US", ev.US)
}
rew := ev.States["Rew"]
rew.Values[0] = ev.Rew
}
// RenderAction renders the action
func (ev *Approach) RenderAction(act int) {
as := ev.States["Action"]
as.SetZeros()
as.Values[act] = 1
}
// Step does one step
func (ev *Approach) Step() bool {
if ev.Dist < 0 || ev.Time >= ev.TimeMax {
ev.NewStart()
}
ev.RenderState()
ev.Rew = 0
ev.US = -1
ev.RenderRewUS()
return true
}
func (ev *Approach) DecodeAct(vt *etensor.Float32) (int, string) {
var max float32
var mxi int
for i, vl := range vt.Values {
if vl > max {
max = vl
mxi = i
}
}
return mxi, ev.Acts[mxi]
}
func (ev *Approach) Action(action string, nop etensor.Tensor) {
act, ok := ev.ActMap[action]
if !ok {
fmt.Printf("Action not recognized: %s\n", action)
return
}
ev.RenderAction(act)
ev.Time++
uss := ev.States["USs"]
us := int(uss.Values[ev.Pos])
switch action {
case "Forward":
ev.Dist--
case "Left":
ev.Pos--
if ev.Pos < 0 {
ev.Pos += ev.Locations
}
case "Right":
ev.Pos++
if ev.Pos >= ev.Locations {
ev.Pos -= ev.Locations
}
case "Consume":
if ev.Dist == 0 {
if us == ev.Drive {
ev.Rew = 1
}
ev.US = us
ev.Dist--
}
}
ev.LastAct = act
ev.RenderRewUS()
}
// ActGen returns an "instinctive" action that implements a basic policy
func (ev *Approach) ActGen() int {
uss := ev.States["USs"]
posUs := int(uss.Values[ev.Pos])
if posUs == ev.Drive {
if ev.Dist == 0 {
return ev.ActMap["Consume"]
}
return ev.ActMap["Forward"]
}
lt := ev.ActMap["Left"]
rt := ev.ActMap["Right"]
if ev.LastAct == lt || ev.LastAct == rt {
return ev.LastAct
}
if erand.BoolP(.5, -1) {
return lt
}
return rt
}