In [1]:
using Flux


In [2]:
# Define the ground truth model. We aim to recover W_truth and b_truth using
# only examples of ground_truth()
W_truth = [1 2 3 4 5;
            5 4 3 2 1]
b_truth = [-1.0; -2.0]
ground_truth(x) = W_truth*x .+ b_truth


ground_truth (generic function with 1 method)

In [3]:
# Generate the ground truth training data as vectors-of-vectors
x_train = [ 5 .* rand(5) for _ in 1:10_000 ]
y_train = [ ground_truth(x) + 0.2 .* randn(2) for x in x_train ]


10000-element Array{Array{Float64,1},1}:
 [39.61470621403595, 36.22959169851687]
 [28.842448719113847, 38.572599435845056]
 [44.80968384230517, 42.185497816984004]
 [18.11746012175172, 22.826898700765945]
 [37.34357597095279, 18.916770967113916]
 [27.687534755486656, 35.51214067327718]
 [50.52802675625742, 43.2374810740802]
 [54.23887708484592, 39.34279247372555]
 [46.77378201450665, 51.5727525403774]
 [27.71335142873436, 34.56992492861688]
 [26.300719059686635, 40.134461562510914]
 [42.17710742283444, 29.356967432931842]
 [50.97572802935713, 38.8974692172143]
 ⋮
 [54.45534562794151, 45.31003945786663]
 [42.26053729878432, 50.228027178796644]
 [19.403389701273927, 33.43017915554205]
 [31.419433853379164, 28.4018406306899]
 [18.594656072655635, 24.559108795551516]
 [40.69594747612888, 26.969366783539808]
 [60.527818811179685, 54.74429852828056]
 [33.70977247596811, 34.00922324134338]
 [41.51396493090932, 41.53055187235174]
 [42.739357247283955, 37.05740005879266]
 [35.19686442694343, 33

In [4]:
# Define and initialize the model we want to train
model(x) = W*x .+ b
W = rand(2, 5)
b = rand(2)


2-element Array{Float64,1}:
 0.08096345461855958
 0.246453838055549

In [5]:
# Define pieces we need to train: loss function, optimiser, examples, and params
function loss(x, y)
  ŷ = model(x)
  sum((y .- ŷ).^2)
end
opt = Descent(0.01)
train_data = zip(x_train, y_train)
ps = params(W, b)


Params([[0.023013439119377255 0.41860686166169825 … 0.12137833153660904 0.31795034957306556; 0.14567448642356906 0.9757304093269719 … 0.7817161245532789 0.40551961018622285], [0.08096345461855958, 0.246453838055549]])

In [6]:
# Execute a training epoch
for (x,y) in train_data
  gs = gradient(ps) do
    loss(x,y)
  end
  Flux.Optimise.update!(opt, ps, gs)
end

# An alternate way to execute a training epoch
# Flux.train!(loss, params(W, b), train_data, opt)


In [7]:
# Print out how well we did
@show W
@show maximum(abs, W .- W_truth)

W = [1.0039272525417395 2.0183115180248015 2.941126976896121 4.030915179864688 4.98815849693528; 5.019364869074857 4.043524527306904 3.000079284736376 1.9797907937455017 1.0015700006582988]
maximum(abs, W .- W_truth) = 0.058873023103878985


0.058873023103878985