In [1]:
using Pkg
Pkg.activate(".")

"C:\\Users\\carsten\\Desktop\\JuliaWorkshop19\\2_Two\\3_machine_learning\\zygote\\"

# Machine Learning in Julia: Flux.jl

<img src="https://fluxml.ai/logo.png" width=800>

<img src="../flux.png" width=900>

Web page: https://fluxml.ai/

Examples: [Model zoo](https://github.com/FluxML/model-zoo/)

# A single neuron

In [5]:
using Flux

In [19]:
model(x) = σ.(W * x + b)

model (generic function with 2 methods)

In [62]:
# single neuron 5 in 1 out
W = randn(1, 5) # weights
b = zeros(1)    # biases
x = rand(5)     # input

5-element Array{Float64,1}:
 0.4256275822507929
 0.9702364215891386
 0.3290662609566366
 0.5213958926845632
 0.5929071691174677

In [63]:
model(x)

1-element Array{Float64,1}:
 0.2177697408997619

In [64]:
loss(x) = Flux.mse(model(x), 0.5) # mean squared error

loss (generic function with 2 methods)

In [65]:
loss(x)

0.07965391915178754

In [66]:
grad = gradient(loss, W, b, x)

([-0.0409256 -0.0932918 … -0.0501341 -0.0570102], [-0.0961536], [0.0750345, 0.130382, -0.0681548, -0.0227079, -0.00205534])

We can now use these gradients to update our parameters.

In [44]:
# update weights and biases

η = 0.1
for (i, p) in enumerate((W, b))
  p .-= η * grad[i] # gradient descent
end

In [82]:
# repeat a couple of times

grad = gradient(loss, W, b, x)

η = 0.1
for (i, p) in enumerate((W, b))
  p .-= η * grad[i] # gradient descent
end

@show loss(x);

loss(x) = 0.0408240348849211


Of course, Flux offers more sophisticated optimizers, like [stochastic gradient descent](https://en.wikipedia.org/wiki/Stochastic_gradient_descent) etc.

# Neural Network

Our full deep learning code:

In [108]:
m = Chain(
    Dense(10, 5),
    Dense(5, 2),
    softmax # normalize output neurons
)

opt = ADAM(0.01)

data, labels = rand(10, 100), fill(0.5, 2, 100)

loss(x, y) = sum(Flux.mse(m(x), y))

Flux.train!(loss, params(m), [(data,labels)], opt)

In [109]:
using Statistics
# loss before training
mean(loss(data[:,i],labels[:,i]) for i in 1:100)

0.015583558094548623

In [120]:
Flux.train!(loss, params(m), [(data,labels)], opt)

# loss after training
mean(loss(data[:,i],labels[:,i]) for i in 1:100)

0.008413629815947827

In [126]:
using Flux, BenchmarkTools
using Flux: crossentropy, onecold, onehotbatch, throttle, @epochs
using Printf, Statistics, Random
using Base.Iterators: repeated

confs_left = rand(64,4000)
confs_right = rand(64,4000)

# set up as training data
neach = size(confs_left, 2)
X = hcat(confs_left, confs_right)
labels = vcat(fill(1, neach), fill(0, neach))
Y = onehotbatch(labels, 0:1)
dataset = repeated((X, Y), 10)

# create neural network with 10 hidden units and 2 output neurons
Random.seed!(123)
m = Chain(
  Dense(64, 10, relu),
  Dense(10, 2),
  softmax)

# define cost-function
loss(x, y) = crossentropy(m(x), y)
accuracy(x, y) = mean(onecold(m(x)) .== onecold(y))

opt = ADAM()

println("-------- Training")
@btime Flux.train!($loss, params($m), $dataset, $opt)

-------- Training




  955.648 ms (28965221 allocations: 587.11 MiB)
