/
accumulate_gradients_dagnn.m
42 lines (33 loc) · 1.69 KB
/
accumulate_gradients_dagnn.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
function state = accumulate_gradients_dagnn(state, net, opts, batchSize, mmap)
% -------------------------------------------------------------------------
for p=1:numel(net.params)
parDer = net.params(p).der ;
if ~isempty(net.params(p).der)
switch net.params(p).trainMethod
case 'average' % mainly for batch normalization
thisLR = net.params(p).learningRate ;
net.params(p).value = ...
(1 - thisLR) * net.params(p).value + ...
(thisLR/batchSize/net.params(p).fanout) * net.params(p).der ;
case 'gradient'
thisDecay = opts.weightDecay * net.params(p).weightDecay ;
thisLR = opts.learningRate * net.params(p).learningRate ;
% Normalize gradient and incorporate weight decay.
parDer = vl_taccum(1/batchSize, parDer, ...
thisDecay, net.params(p).value) ;
% Update momentum.
state.momentum{p} = vl_taccum(...
opts.momentum, state.momentum{p}, ...
-1, parDer) ;
% Nesterov update (aka one step ahead).
delta = state.momentum{p} ;
% Update parameters.
net.params(p).value = vl_taccum(...
1, net.params(p).value, thisLR, delta) ;
case 'otherwise'
error('Unknown training method ''%s'' for parameter ''%s''.', ...
net.params(p).trainMethod, ...
net.params(p).name) ;
end
end
end