/
globals.go
235 lines (165 loc) · 8.64 KB
/
globals.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
// Copyright (c) 2023, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package axon
import "github.com/goki/ki/kit"
//go:generate stringer -type=GlobalVars
var KiT_GlobalVars = kit.Enums.AddEnum(GlobalVarsN, kit.NotBitFlag, nil)
func (ev GlobalVars) MarshalJSON() ([]byte, error) { return kit.EnumMarshalJSON(ev) }
func (ev *GlobalVars) UnmarshalJSON(b []byte) error { return kit.EnumUnmarshalJSON(ev, b) }
//gosl: start globals
// GlobalVars are network-wide variables, such as neuromodulators, reward, drives, etc
// including the state for the PVLV phasic dopamine model.
type GlobalVars int32
const (
/////////////////////////////////////////
// Reward
// Rew is reward value -- this is set here in the Context struct, and the RL Rew layer grabs it from there -- must also set HasRew flag when rew is set -- otherwise is ignored.
GvRew GlobalVars = iota
// HasRew must be set to true when a reward is present -- otherwise Rew is ignored. Also set when PVLV BOA model gives up. This drives ACh release in the PVLV model.
GvHasRew
// RewPred is reward prediction -- computed by a special reward prediction layer
GvRewPred
// PrevPred is previous time step reward prediction -- e.g., for TDPredLayer
GvPrevPred
// HadRew is HasRew state from the previous trial -- copied from HasRew in NewState -- used for updating Effort, Urgency at start of new trial
GvHadRew
/////////////////////////////////////////
// NeuroMod neuromodulators
// DA is dopamine -- represents reward prediction error, signaled as phasic increases or decreases in activity relative to a tonic baseline, which is represented by a value of 0. Released by the VTA -- ventral tegmental area, or SNc -- substantia nigra pars compacta.
GvDA
// ACh is acetylcholine -- activated by salient events, particularly at the onset of a reward / punishment outcome (US), or onset of a conditioned stimulus (CS). Driven by BLA -> PPtg that detects changes in BLA activity, via LDTLayer type
GvACh
// NE is norepinepherine -- not yet in use
GvNE
// Ser is serotonin -- not yet in use
GvSer
// AChRaw is raw ACh value used in updating global ACh value by LDTLayer
GvAChRaw
// NotMaint is activity of the PTNotMaintLayer -- drives top-down inhibition of LDT layer / ACh activity.
GvNotMaint
/////////////////////////////////////////
// VSMatrix gating and PVLV Rew flags
// VSMatrixJustGated is VSMatrix just gated (to engage goal maintenance in PFC areas), set at end of plus phase -- this excludes any gating happening at time of US
GvVSMatrixJustGated
// VSMatrixHasGated is VSMatrix has gated since the last time HasRew was set (US outcome received or expected one failed to be received
GvVSMatrixHasGated
// CuriosityPoolGated is true if VSMatrixJustGated and the first pool representing the curiosity / novelty drive gated -- this can change the giving up Effort.Max parameter.
GvCuriosityPoolGated
/////////////////////////////////////////
// Time, Effort & Urgency
// Time is raw time counter, incrementing upward during goal engaged window.
// This is also copied directly into NegUS[0] which tracks time, but we maintain
// a separate effort value to make it clearer.
GvTime
// Effort is raw effort counter -- incrementing upward for each effort step
// during goal engaged window.
// This is also copied directly into NegUS[1] which tracks effort, but we maintain
// a separate effort value to make it clearer.
GvEffort
// UrgencyRaw is raw effort for urgency -- incrementing upward from effort
// increments per step when _not_ goal engaged
GvUrgencyRaw
// Urgency is the overall urgency activity level (normalized 0-1),
// computed from logistic function of GvUrgencyRaw
GvUrgency
/////////////////////////////////////////
// US / PV
// HasPosUS indicates has positive US on this trial -- drives goal accomplishment logic
// and gating.
GvHasPosUS
// HadPosUS is state from the previous trial (copied from HasPosUS in NewState).
GvHadPosUS
// NegUSOutcome indicates that a strong negative US stimulus was experienced,
// driving phasic ACh, VSMatrix gating to reset current goal engaged plan (if any),
// and phasic dopamine based on the outcome.
GvNegUSOutcome
// HadNegUSOutcome is state from the previous trial (copied from NegUSOutcome in NewState)
GvHadNegUSOutcome
// PVposSum is total weighted positive valence primary value = sum of Weight * USpos * Drive
GvPVposSum
// PVpos is normalized positive valence primary value = (1 - 1/(1+PVposGain * PVposSum))
GvPVpos
// PVnegSum is total weighted negative valence primary value = sum of Weight * USneg
GvPVnegSum
// PVpos is normalized negative valence primary value = (1 - 1/(1+PVnegGain * PVnegSum))
GvPVneg
// PVposEst is the estimated PVpos value based on OFCposUSPT and VSMatrix gating
GvPVposEst
// PVposEstSum is the sum that goes into computing estimated PVpos
// value based on OFCposUSPT and VSMatrix gating
GvPVposEstSum
// PVposEstDisc is the discounted version of PVposEst, subtracting VSPatchPosSum,
// which represents the accumulated expectation of PVpos to this point.
GvPVposEstDisc
// GiveUpDiff is the difference: PVposEstDisc - PVneg representing the
// expected positive outcome up to this point. When this turns negative,
// the chance of giving up goes up proportionally, as a logistic
// function of this difference.
GvGiveUpDiff
// GiveUpProb is the probability from the logistic function of GiveUpDiff
GvGiveUpProb
// GiveUp is true if a reset was triggered probabilistically based on GiveUpProb
GvGiveUp
// GaveUp is copy of GiveUp from previous trial
GvGaveUp
/////////////////////////////////////////
// VSPatch prediction of PVpos net value
// VSPatchPos is net shunting input from VSPatch (PosD1, named PVi in original PVLV)
// computed as the Max of US-specific VSPatch saved values.
// This is also stored as GvRewPred.
GvVSPatchPos
// VSPatchPosPrev is the previous-trial version of VSPatchPos -- for adjusting the
// VSPatchThr threshold
GvVSPatchPosPrev
// VSPatchPosSum is the sum of VSPatchPos over goal engaged trials,
// representing the integrated prediction that the US is going to occur
GvVSPatchPosSum
/////////////////////////////////////////
// LHb lateral habenula component of the PVLV model -- does all US processing
// computed LHb activity level that drives dipping / pausing of DA firing,
// when VSPatch pos prediction > actual PV reward drive
// or PVneg > PVpos
GvLHbDip
// LHbBurst is computed LHb activity level that drives bursts of DA firing, when actual PV reward drive > VSPatch pos prediction
GvLHbBurst
// LHbPVDA is GvLHbBurst - GvLHbDip -- the LHb contribution to DA, reflecting PV and VSPatch (PVi), but not the CS (LV) contributions
GvLHbPVDA
/////////////////////////////////////////
// Amygdala CS / LV variables
// CeMpos is positive valence central nucleus of the amygdala (CeM) LV (learned value) activity, reflecting |BLAPosAcqD1 - BLAPosExtD2|_+ positively rectified. CeM sets Raw directly. Note that a positive US onset even with no active Drive will be reflected here, enabling learning about unexpected outcomes
GvCeMpos
// CeMneg is negative valence central nucleus of the amygdala (CeM) LV (learned value) activity, reflecting |BLANegAcqD2 - BLANegExtD1|_+ positively rectified. CeM sets Raw directly
GvCeMneg
/////////////////////////////////////////
// VTA ventral tegmental area dopamine release
// VtaDA is overall dopamine value reflecting all of the different inputs
GvVtaDA
/////////////////////////////////////////
// USneg is negative valence US
// allocated for Nitems
// USneg are negative valence US outcomes -- normalized version of raw,
// NNegUSs of them
GvUSneg
// USnegRaw are raw, linearly incremented negative valence US outcomes,
// this value is also integrated together with all US vals for PVneg
GvUSnegRaw
///////////////////////////////////////////////////////////
// USpos, VSPatch
// Drives is current drive state -- updated with optional homeostatic exponential return to baseline values
GvDrives
// USpos is current positive-valence drive-satisfying input(s) (unconditioned stimuli = US)
GvUSpos
// VSPatch is current reward predicting VSPatch (PosD1) values
GvVSPatch
// VSPatch is previous reward predicting VSPatch (PosD1) values
GvVSPatchPrev
// OFCposUSPTMaint is activity level of given OFCposUSPT maintenance pool
// used in anticipating potential USpos outcome value
GvOFCposUSPTMaint
// VSMatrixPoolGated indicates whether given VSMatrix pool gated
// this is reset after last goal accomplished -- records gating since then.
GvVSMatrixPoolGated
GlobalVarsN
)
//gosl: end globals