From 96d4ffb7cf2cc92aa9c30304869d8116d13c470d Mon Sep 17 00:00:00 2001 From: cangumeli Date: Tue, 13 Feb 2018 17:35:28 +0300 Subject: [PATCH 1/2] L2 Decay added to adam --- src/update.jl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/update.jl b/src/update.jl index 583f88da8..fe2e014c0 100644 --- a/src/update.jl +++ b/src/update.jl @@ -29,7 +29,7 @@ end const SGDLR = 0.001 -Sgd(; lr=SGDLR, gclip=0) = Sgd(lr,gclip) +Sgd(; lr=SGDLR, gclip=0) = Sgd(lr, gclip) """ @@ -237,7 +237,7 @@ Rmsprop(; lr=0.001, gclip=0, rho=0.9, eps=1e-6)=Rmsprop(lr, gclip, rho, eps, not """ - Adam(;lr=0.001, gclip=0, beta1=0.9, beta2=0.999, eps=1e-8) + Adam(;lr=0.001, gclip=0, beta1=0.9, beta2=0.999, eps=1e-8, l2decay=0) update!(w,g,p::Adam) Container for parameters of the Adam optimization algorithm used by @@ -280,9 +280,10 @@ type Adam t::Int fstm scndm + l2decay::AbstractFloat end -Adam(; lr=0.001, gclip=0, beta1=0.9, beta2=0.999, eps=1e-8)=Adam(lr, gclip, beta1, beta2, eps, 0, nothing, nothing) +Adam(; lr=0.001, gclip=0, beta1=0.9, beta2=0.999, eps=1e-8, l2decay=0)=Adam(lr, gclip, beta1, beta2, eps, 0, nothing, nothing, l2decay) """ @@ -377,6 +378,7 @@ for T in (Array{Float32},Array{Float64},KnetArray{Float32},KnetArray{Float64}); end function update!(w::$T, g::$T, p::Adam) + l2decay!(w, g, p) gclip!(g, p.gclip) if p.fstm===nothing; p.fstm=zeros(w); p.scndm=zeros(w); end p.t += 1 @@ -487,6 +489,12 @@ function gclip!(g, gclip) end end +function l2decay!(w, g, o) + o.l2decay == 0 && return g + axpy!(o.l2decay, w, g) +end + + """ optimizers(model, otype; options...) From 7cd74db6919ef77a86fb5a6649587d42411f1be2 Mon Sep 17 00:00:00 2001 From: cangumeli Date: Tue, 13 Feb 2018 17:39:13 +0300 Subject: [PATCH 2/2] Empty space removal --- src/update.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/update.jl b/src/update.jl index fe2e014c0..45468e82b 100644 --- a/src/update.jl +++ b/src/update.jl @@ -29,7 +29,7 @@ end const SGDLR = 0.001 -Sgd(; lr=SGDLR, gclip=0) = Sgd(lr, gclip) +Sgd(; lr=SGDLR, gclip=0) = Sgd(lr,gclip) """