## [Model Building Basics](https://fluxml.ai/Flux.jl/stable/models/basics/)

In [9]:
using Flux, Flux.Tracker
using Flux.Tracker: update!

In [2]:
f(W, b, x) = W * x + b

f (generic function with 1 method)

In [3]:
Tracker.gradient(f, 2, 3, 4)

(4.0 (tracked), 1.0 (tracked), 2.0 (tracked))

### Simple Model

In [4]:
W = rand(2, 5)
b = rand(2)

predict(x) = W*x .+ b

function loss(x, y)
  ŷ = predict(x)
  sum((y .- ŷ).^2)
end

x, y = rand(5), rand(2) # Dummy data
loss(x, y) # ~ 3

2.57244859228637

#### Gradient Descent

In [5]:
W = param(W)
b = param(b)

gs = Tracker.gradient(() -> loss(x, y), params(W, b))

Grads(...)


In [10]:
Δ = gs[W]

# Update the parameter and reset the gradient
update!(W, -0.1Δ)

loss(x, y) # ~ 2.5

1.985806971316228 (tracked)

### Building Layers

In [11]:
W1 = param(rand(3, 5))
b1 = param(rand(3))
layer1(x) = W1 * x .+ b1

W2 = param(rand(2, 3))
b2 = param(rand(2))
layer2(x) = W2 * x .+ b2

model(x) = layer2(σ.(layer1(x)))

model(rand(5)) # => 2-element vector

Tracked 2-element Array{Float64,1}:
 2.090124982499865 
 1.2199835234699687

#### Making linear layer function

In [12]:
function linear(in, out)
  W = param(randn(out, in))
  b = param(randn(out))
  x -> W * x .+ b
end

linear1 = linear(5, 3) # we can access linear1.W etc
linear2 = linear(3, 2)

model(x) = linear2(σ.(linear1(x)))

model(rand(5)) # => 2-element vector

Tracked 2-element Array{Float64,1}:
 -0.02948623550207019
  1.1407046566680694 

#### Equivalent way with a struct

In [13]:
struct Affine
  W
  b
end

Affine(in::Integer, out::Integer) =
  Affine(param(randn(out, in)), param(randn(out)))

# Overload call, so the object can be used as a function
(m::Affine)(x) = m.W * x .+ m.b

a = Affine(10, 5)

a(rand(10)) # => 5-element vector

Tracked 5-element Array{Float64,1}:
  1.1323009942587032
 -3.570977309165567 
 -3.6661803526305787
  3.0756382201430936
 -3.524317813766377 