Just a bit of testing of the back! function in Flux

In [1]:
using Flux, Flux.Tracker

W = param([1., 1.])
predict(x) = W * x        # prediction function

loss(x, y) = sum((predict(x) .- y) .^ 2) # loss function

x = 1
y = 0

l = loss(x, y)
back!(l)
println(W.grad) # gradient should be [2, 2]

x = 2
l = loss(x, y)
back!(l)        # gradient should be [8, 8]
println(W.grad) # it shows [10, 10] so back! sums gradients together

x = 1
l = loss(x, y)
back!(l)        # gradient should be [2, 2]
println(W.grad) # it shows [12, 12] so back! definitely does it

W = param([1., 1.])
l = loss(x, y)
back!(l)
println(W.grad) # new weights reset it


[2.0, 2.0]
[10.0, 10.0]
[12.0, 12.0]
[2.0, 2.0]


In [2]:
W = param([1., 1.])

mutable struct Lparam
    p::Float64
end

lossParam = Lparam(2.)

loss(x, y) = lossParam.p * sum((predict(x) .- y) .^ 2) # create parametrical loss

l = loss(x, y)
back!(l)
println(W.grad)  # gradient is [4, 4]

lossParam.p = 1. # adjust the p

l = loss(x, y)
back!(l)
println(W.grad) # gradient is [2, 2] and it shows [6, 6] which is correct in sum


[4.0, 4.0]
[6.0, 6.0]


In [3]:
W = param([1., 1.])
lossParam = Lparam(1.)
loss(x, y) = sum((predict(x) .- y) .^ lossParam.p) # create parametrical loss

l = loss(x, y)
back!(l)
println(W.grad) # gradient is [1, 1]

lossParam.p = 2. # adjust the p

l = loss(x, y)
back!(l)
println(W.grad) # gradient is [2, 2] and it shows [3, 3] which is correct in sum

[1.0, 1.0]
[3.0, 3.0]
