# Julia Deep Learning

https://fluxml.ai/

https://github.com/FluxML/model-zoo

In [1]:
using Flux
using CuArrays

## MNIST Dataset

In [2]:
MNISTspec = (
    input_size = (28, 28, 1),
    num_classes = 10,
    train_size = 60000,
    test_size = 10000,
)
batch_size = 256;

In [3]:
function load_mnist(split=:train)
    mnist = Flux.Data.MNIST
    
    images = mnist.images(split) # Array with N images of 28x28 8-bits gray
    labels = mnist.labels(split) # Array with N labels scalar 0-9
    
    return images, labels
end

X, y = load_mnist()

println("X: type=$(typeof(X)), size=$(size(X))")
println("y: type=$(typeof(y)), size=$(size(y))")

X: type=Array{Array{ColorTypes.Gray{FixedPointNumbers.Normed{UInt8,8}},2},1}, size=(60000,)
y: type=Array{Int64,1}, size=(60000,)


In [4]:
import Flux: onehot
import Base.Iterators: partition

function make_batch(X, y, batch_size)
    num_examples = length(X)
    num_batches = ceil(Int, num_examples / batch_size)
    
    batches = Array{Tuple{Array{Float32,4},Array{Float32,2}},1}(undef, num_batches)
    batch_indices = partition(1:num_examples, batch_size)
    for (i, indices) in enumerate(batch_indices)
        n = length(indices)
        X_i = Array{Float32,4}(undef, MNISTspec.input_size..., n)
        y_i = Array{Float32,2}(undef, MNISTspec.num_classes, n)
        for (j, k) in zip(1:n, indices)
            X_i[:, :, :, j] = Float32.(reshape(X[k], MNISTspec.input_size...))
            y_i[:, j] = Float32.(onehot(y[k], 0:MNISTspec.num_classes-1))
        end
        batches[i] = (X_i, y_i)
    end
    
    return batches
end

train_data = make_batch(X, y, batch_size)

println("Train Data: type=$(typeof(train_data)), size=$(size(train_data))")

Train Data: type=Array{Tuple{Array{Float32,4},Array{Float32,2}},1}, size=(235,)


In [5]:
size(train_data[1][1])

(28, 28, 1, 256)

In [6]:
size(train_data[end][1])

(28, 28, 1, 96)

In [7]:
size(train_data[1][2])

(10, 256)

In [8]:
size(train_data[end][2])

(10, 96)

In [9]:
test_data = make_batch(load_mnist(:test)..., MNISTspec.test_size)[1]

println("Test Data: type=$(typeof(test_data))")
println(size(test_data[1]))
println(size(test_data[2]))

Test Data: type=Tuple{Array{Float32,4},Array{Float32,2}}
(28, 28, 1, 10000)
(10, 10000)


## Keras-like API

In [10]:
struct Flatten end

(::Flatten)(x) = reshape(x, :, size(x)[end])

struct Reshape
    dims
end

(layer::Reshape)(x) = reshape(x, layer.dims..., size(x)[end])

## MLP

https://github.com/FluxML/model-zoo/blob/master/vision/mnist/mlp.jl

In [11]:
input_size = MNISTspec.input_size
output_size = MNISTspec.num_classes
hidden_size = 500

model = Chain(
    Flatten(),
    Dense(prod(input_size), hidden_size, relu),
    Dense(hidden_size, output_size),
    softmax,
)

model = gpu(model)

model

Chain(Flatten(), Dense(784, 500, NNlib.relu), Dense(500, 10), NNlib.softmax)

In [12]:
let
    x1 = gpu(train_data[1][1])
    display(model(x1))
end

Tracked 10×256 CuArray{Float32,2}:
 0.114505   0.0771493  0.102166   …  0.0848796  0.132435   0.0846112
 0.0873715  0.122454   0.115082      0.0792973  0.0578625  0.101084 
 0.135072   0.12598    0.0456826     0.118104   0.111455   0.123017 
 0.083376   0.0428141  0.063969      0.0640264  0.0601236  0.0564281
 0.169315   0.140546   0.203719      0.135257   0.16321    0.179357 
 0.046649   0.0471217  0.0864572  …  0.0498085  0.0503107  0.0419649
 0.0535477  0.0722689  0.0944119     0.0648531  0.0774565  0.0723548
 0.102301   0.111181   0.0745297     0.130334   0.118817   0.0964773
 0.0693899  0.0956539  0.124982      0.144593   0.0977641  0.0877099
 0.138474   0.164832   0.0890001     0.128847   0.130565   0.156996 

In [13]:
import Flux: logitcrossentropy, onecold
import Statistics: mean

function loss(x, y)
    logits = model[1:end-1](x)
    return logitcrossentropy(logits, y)
end

function accuracy(x, y)
    y_hat = model(x)
    return mean(onecold(y_hat) .== onecold(y))
end


let
    println("Train")
    
    data1 = gpu(train_data[1])
    loss_ = loss(data1...)
    acc_ = accuracy(data1...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

println()

let
    println("Test")

    data = gpu(test_data)
    loss_ = loss(data...)
    acc_ = accuracy(data...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

Train
Loss: 2.3745413f0 (tracked)
Accuracy: 0.11328125

Test
Loss: 2.38708f0 (tracked)
Accuracy: 0.1152


In [14]:
import Flux: train!
import BSON: bson
import Printf: @printf

optimizer = ADAM(0.001)

best_acc = 0.0
last_improvement = 0
for epoch_idx in 1:100
    # Train for a single epoch
    for i in 1:length(train_data)
        let data = gpu(train_data[i])
            train!(loss, params(model), [data], optimizer)
        end
    end
    
    # Calculate accuracy:
    acc = let data = gpu(test_data)
        accuracy(data...)
    end
    
    @printf("[%d] Test accuracy: %.4f\n", epoch_idx, acc)

    # If our accuracy is good enough, quit out.
    if acc >= 0.999
        println(" -> Early-exiting: We reached our target accuracy of 99.9%")
        break
    end

    # If this is the best accuracy we've seen so far, save the model out
    if acc >= best_acc
        println(" -> New best accuracy! Saving model out to mlp_mnist.bson")
        bson("mlp_mnist.bson", model = cpu(model), epoch = epoch_idx)
        best_acc = acc
        last_improvement = epoch_idx
    end

    # If we haven't seen improvement in 5 epochs, drop our learning rate:
    if epoch_idx - last_improvement >= 5 && optimizer.eta > 1e-6
        optimizer.eta /= 10.0
        println(" -> Haven't improved in a while, dropping learning rate to $(optimizer.eta)!")

        # After dropping learning rate, give it a few epochs to improve
        last_improvement = epoch_idx
    end

    if epoch_idx - last_improvement >= 10
        println(" -> We're calling this converged.")
        break
    end
end

[1] Test accuracy: 0.9508
 -> New best accuracy! Saving model out to mlp_mnist.bson
[2] Test accuracy: 0.9654
 -> New best accuracy! Saving model out to mlp_mnist.bson
[3] Test accuracy: 0.9698
 -> New best accuracy! Saving model out to mlp_mnist.bson
[4] Test accuracy: 0.9719
 -> New best accuracy! Saving model out to mlp_mnist.bson
[5] Test accuracy: 0.9728
 -> New best accuracy! Saving model out to mlp_mnist.bson
[6] Test accuracy: 0.9747
 -> New best accuracy! Saving model out to mlp_mnist.bson
[7] Test accuracy: 0.9766
 -> New best accuracy! Saving model out to mlp_mnist.bson
[8] Test accuracy: 0.9767
 -> New best accuracy! Saving model out to mlp_mnist.bson
[9] Test accuracy: 0.9772
 -> New best accuracy! Saving model out to mlp_mnist.bson
[10] Test accuracy: 0.9775
 -> New best accuracy! Saving model out to mlp_mnist.bson
[11] Test accuracy: 0.9769
[12] Test accuracy: 0.9770
[13] Test accuracy: 0.9783
 -> New best accuracy! Saving model out to mlp_mnist.bson
[14] Test accuracy: 

In [15]:
model = nothing

GC.gc()

In [16]:
import BSON: load

model = load("mlp_mnist.bson")[:model]

model

Chain(Flatten(), Dense(784, 500, NNlib.relu), Dense(500, 10), NNlib.softmax)

In [17]:
let
    println("Test")

    loss_ = loss(test_data...)
    acc_ = accuracy(test_data...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

Test
Loss: 0.07279701f0 (tracked)
Accuracy: 0.9831


## CNN

https://github.com/FluxML/model-zoo/blob/master/vision/mnist/conv.jl

In [18]:
input_size = MNISTspec.input_size
output_size = MNISTspec.num_classes

conv1_kernel_size = (3, 3)
conv1_filter_size = input_size[end] => 16
conv1_pad = (1, 1)
pool1_size = (2, 2)

conv2_kernel_size = (3, 3)
conv2_filter_size = conv1_filter_size.second => 32
conv2_pad = (1, 1)
pool2_size = (2, 2)

conv3_kernel_size = (3, 3)
conv3_filter_size = conv2_filter_size.second => 32
conv3_pad = (1, 1)
pool3_size = (2, 2)

fc1_size = prod(input_size[1:2] .÷ pool1_size .÷ pool2_size .÷ pool3_size) * conv3_filter_size.second

model = Chain(
    Conv(conv1_kernel_size, conv1_filter_size, pad=conv1_pad, relu),
    MaxPool(pool1_size),

    Conv(conv2_kernel_size, conv2_filter_size, pad=conv2_pad, relu),
    MaxPool(pool2_size),

    Conv(conv3_kernel_size, conv3_filter_size, pad=conv3_pad, relu),
    MaxPool(pool3_size),
    
    Flatten(),
    Dense(fc1_size, output_size),
    softmax,
)

model = gpu(model)

model

Chain(Conv((3, 3), 1=>16, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 16=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Flatten(), Dense(288, 10), NNlib.softmax)

In [19]:
let
    x1 = gpu(train_data[1][1])
    display(model(x1))
end

Tracked 10×256 CuArray{Float32,2}:
 0.0268965   0.0520115   0.0376512   …  0.0256713  0.0193849   0.0507047 
 0.0204527   0.02113     0.0304092      0.0413319  0.0450546   0.0258311 
 0.0108412   0.00711891  0.0945252      0.0290647  0.0125918   0.0109416 
 0.015458    0.0148641   0.00753938     0.0163237  0.0243562   0.0139373 
 0.00486294  0.0224071   0.0165965      0.0144639  0.00979678  0.00589586
 0.306127    0.386864    0.375069    …  0.618564   0.387693    0.198621  
 0.0432158   0.104996    0.115574       0.0365944  0.0212408   0.0485221 
 0.166338    0.202689    0.191758       0.0670858  0.124566    0.133041  
 0.260729    0.147857    0.106605       0.111268   0.342465    0.379554  
 0.145079    0.0400625   0.0242734      0.0396327  0.0128503   0.132951  

In [20]:
import Flux: crossentropy, onecold
import Statistics: mean

function loss_noise(x, y)
    x_aug = x .+ 0.1f0 * gpu(randn(eltype(x), size(x)))
    logits = model[1:end-1](x)
    return logitcrossentropy(logits, y)
end

function accuracy(x, y)
    y_hat = model(x)
    return mean(onecold(y_hat) .== onecold(y))
end


let
    println("Train")
    
    data1 = gpu(train_data[1])
    loss_ = loss(data1...)
    acc_ = accuracy(data1...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

Train
Loss: 3.163166f0 (tracked)
Accuracy: 0.07421875


In [21]:
import Flux: train!
import BSON: bson
import Printf: @printf

optimizer = ADAM(0.001)

best_acc = 0.0
last_improvement = 0
for epoch_idx in 1:100
    # Train for a single epoch
    for i in 1:length(train_data)
        let data = gpu(train_data[i])
            train!(loss_noise, params(model), [data], optimizer)
        end
    end
    
    # Calculate accuracy:
    acc = let data = gpu(test_data)
        accuracy(data...)
    end
    
    @printf("[%d] Test accuracy: %.4f\n", epoch_idx, acc)
    
    # If our accuracy is good enough, quit out.
    if acc >= 0.999
        println(" -> Early-exiting: We reached our target accuracy of 99.9%")
        break
    end

    # If this is the best accuracy we've seen so far, save the model out
    if acc >= best_acc
        println(" -> New best accuracy! Saving model out to cnn_mnist.bson")
        bson("cnn_mnist.bson", model = cpu(model), epoch = epoch_idx)
        best_acc = acc
        last_improvement = epoch_idx
    end

    # If we haven't seen improvement in 5 epochs, drop our learning rate:
    if epoch_idx - last_improvement >= 5 && optimizer.eta > 1e-6
        optimizer.eta /= 10.0
        println(" -> Haven't improved in a while, dropping learning rate to $(optimizer.eta)!")

        # After dropping learning rate, give it a few epochs to improve
        last_improvement = epoch_idx
    end

    if epoch_idx - last_improvement >= 10
        println(" -> We're calling this converged.")
        break
    end
end

[1] Test accuracy: 0.9495
 -> New best accuracy! Saving model out to cnn_mnist.bson
[2] Test accuracy: 0.9694
 -> New best accuracy! Saving model out to cnn_mnist.bson
[3] Test accuracy: 0.9769
 -> New best accuracy! Saving model out to cnn_mnist.bson
[4] Test accuracy: 0.9801
 -> New best accuracy! Saving model out to cnn_mnist.bson
[5] Test accuracy: 0.9827
 -> New best accuracy! Saving model out to cnn_mnist.bson
[6] Test accuracy: 0.9838
 -> New best accuracy! Saving model out to cnn_mnist.bson
[7] Test accuracy: 0.9850
 -> New best accuracy! Saving model out to cnn_mnist.bson
[8] Test accuracy: 0.9863
 -> New best accuracy! Saving model out to cnn_mnist.bson
[9] Test accuracy: 0.9864
 -> New best accuracy! Saving model out to cnn_mnist.bson
[10] Test accuracy: 0.9868
 -> New best accuracy! Saving model out to cnn_mnist.bson
[11] Test accuracy: 0.9874
 -> New best accuracy! Saving model out to cnn_mnist.bson
[12] Test accuracy: 0.9871
[13] Test accuracy: 0.9870
[14] Test accuracy: 

In [22]:
model = nothing

GC.gc()

In [23]:
import BSON: load

model = load("cnn_mnist.bson")[:model]

model

Chain(Conv((3, 3), 1=>16, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 16=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Flatten(), Dense(288, 10), NNlib.softmax)

In [24]:
let
    println("Test")

    loss_ = loss(test_data...)
    acc_ = accuracy(test_data...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

Test
Loss: 0.02778018f0 (tracked)
Accuracy: 0.9913


## Autoencoder

https://github.com/FluxML/model-zoo/blob/master/vision/mnist/autoencoder.jl

In [25]:
input_size = MNISTspec.input_size
hidden_size = 500
encoded_size = 50

encoder = Chain(
    Flatten(),
    Dense(prod(input_size), hidden_size, leakyrelu),
    Dense(hidden_size, encoded_size, leakyrelu),
)

decoder = Chain(
    Dense(encoded_size, hidden_size, leakyrelu),
    Dense(hidden_size, prod(input_size), leakyrelu),
    Reshape(input_size),
)

model = Chain(encoder, decoder)

model = gpu(model)

model

Chain(Chain(Flatten(), Dense(784, 500, NNlib.leakyrelu), Dense(500, 50, NNlib.leakyrelu)), Chain(Dense(50, 500, NNlib.leakyrelu), Dense(500, 784, NNlib.leakyrelu), Reshape((28, 28, 1))))

In [26]:
let
    x1 = gpu(train_data[1][1])
    x1_hat = model(x1)
    println("x    : type=$(typeof(x1)), size=$(size(x1))")
    println("x_hat: type=$(typeof(x1_hat)), size=$(size(x1_hat))")
end

x    : type=CuArray{Float32,4}, size=(28, 28, 1, 256)
x_hat: type=TrackedArray{…,CuArray{Float32,4}}, size=(28, 28, 1, 256)


In [27]:
function loss(x)
    x_hat = model(x)
    err = x_hat .- x
    N = length(x)
    return sum(err .* err) / N
end

let
    println("Train")
    x1 = gpu(train_data[1][1])
    loss_ = loss(x1)
    println("Loss: ", loss_)
end

let
    println("Test")
    data = gpu(test_data[1])
    loss_ = loss(data)
    println("Loss: ", loss_)
end

Train
Loss: 0.104881465f0 (tracked)
Test
Loss: 0.1089153f0 (tracked)


In [28]:
import Flux: train!
import BSON: bson
import Printf: @printf

optimizer = ADAM(0.001)

best_loss = Inf
last_improvement = 0
for epoch_idx in 1:100
    # Train for a single epoch
    for i in 1:length(train_data)
        let data = gpu(train_data[i][1])
            train!(loss, params(model), [(data,)], optimizer)
        end
    end
    
    # Calculate accuracy:
    loss_ = let data = gpu(test_data[1])
        loss(data)
    end
    
    @printf("[%d] Test loss: %.4f\n", epoch_idx, loss_)
    
    # If this is the best accuracy we've seen so far, save the model out
    if loss_ <= best_loss
        println(" -> New best loss! Saving model out to ae_mnist.bson")
        bson("ae_mnist.bson", model = cpu(model), epoch = epoch_idx)
        best_loss = loss_
        last_improvement = epoch_idx
    end

    # If we haven't seen improvement in 5 epochs, drop our learning rate:
    if epoch_idx - last_improvement >= 5 && optimizer.eta > 1e-6
        optimizer.eta /= 10.0
        println(" -> Haven't improved in a while, dropping learning rate to $(optimizer.eta)!")

        # After dropping learning rate, give it a few epochs to improve
        last_improvement = epoch_idx
    end

    if epoch_idx - last_improvement >= 10
        println(" -> We're calling this converged.")
        break
    end
end

[1] Test loss: 0.0121
 -> New best loss! Saving model out to ae_mnist.bson
[2] Test loss: 0.0096
 -> New best loss! Saving model out to ae_mnist.bson
[3] Test loss: 0.0087
 -> New best loss! Saving model out to ae_mnist.bson
[4] Test loss: 0.0082
 -> New best loss! Saving model out to ae_mnist.bson
[5] Test loss: 0.0078
 -> New best loss! Saving model out to ae_mnist.bson
[6] Test loss: 0.0072
 -> New best loss! Saving model out to ae_mnist.bson
[7] Test loss: 0.0068
 -> New best loss! Saving model out to ae_mnist.bson
[8] Test loss: 0.0066
 -> New best loss! Saving model out to ae_mnist.bson
[9] Test loss: 0.0063
 -> New best loss! Saving model out to ae_mnist.bson
[10] Test loss: 0.0061
 -> New best loss! Saving model out to ae_mnist.bson
[11] Test loss: 0.0060
 -> New best loss! Saving model out to ae_mnist.bson
[12] Test loss: 0.0058
 -> New best loss! Saving model out to ae_mnist.bson
[13] Test loss: 0.0058
 -> New best loss! Saving model out to ae_mnist.bson
[14] Test loss: 0.005

In [29]:
model = nothing

GC.gc()

In [30]:
import BSON: load

model = load("ae_mnist.bson")[:model]

model

Chain(Chain(Flatten(), Dense(784, 500, NNlib.leakyrelu), Dense(500, 50, NNlib.leakyrelu)), Chain(Dense(50, 500, NNlib.leakyrelu), Dense(500, 784, NNlib.leakyrelu), Reshape((28, 28, 1))))

In [31]:
let
    println("Test")
    loss_ = loss(test_data[1])
    println("Loss: ", loss_)
end

Test
Loss: 0.0036634277f0 (tracked)
