In [1]:
using Flux

## 

In [2]:
struct Affine
  W
  b
end

In [3]:
Affine(in::Integer, out::Integer) =
  Affine(randn(out, in), randn(out))

Affine

In [4]:
(m::Affine)(x) = m.W * x .+ m.b

In [5]:
a = Affine(5,1)

Affine([-1.3998924910546415 -0.8346975821558497 … -0.4713583615515866 0.5470560860447551], [2.8189807034404044])

In [6]:
a(rand(5,2))

1×2 Matrix{Float64}:
 1.43119  1.253

In [7]:
Flux.trainable(a::Affine) = (a.W,)

In [8]:
ps = Flux.params(a)

Params([[-1.3998924910546415 -0.8346975821558497 … -0.4713583615515866 0.5470560860447551]])

In [9]:
x = rand(5, 100)
y = rand(1,5)*x

1×100 Matrix{Float64}:
 1.16183  0.849308  1.00973  1.53476  …  1.12372  0.680683  0.859187

In [30]:
gs = Flux.gradient(()->Flux.mse(a(x), y), Flux.params(a))

Grads(...)

In [31]:
gs[a.W]

1×5 Matrix{Float64}:
 0.636324  0.685968  0.904887  0.775993  0.963854

## MTL Split Layers

In [36]:
using Fluxperimental

In [37]:
model = Chain(
              Dense(3 => 5),
              Split(Dense(5 => 1, tanh), Dense(5 => 1, tanh))
        )

Chain(
  Dense(3 => 5),                        [90m# 20 parameters[39m
  Split(
    Tuple(
      Dense(5 => 1, tanh),              [90m# 6 parameters[39m
      Dense(5 => 1, tanh),              [90m# 6 parameters[39m
    ),
  ),
) [90m                  # Total: 6 arrays, [39m32 parameters, 576 bytes.

In [40]:
xs = randn(3, 4)
ypred = model(xs)
typeof(ypred)

Tuple{Matrix{Float64}, Matrix{Float64}}