# Julia Deep Learning

https://fluxml.ai/

https://github.com/FluxML/model-zoo

In [1]:
using Flux
using CuArrays

CuArrays.allowscalar(false)

## MNIST Dataset

In [2]:
MNISTspec = (
    input_size = (28, 28, 1),
    num_classes = 10,
    train_size = 60000,
    test_size = 10000,
)
batch_size = 256;

In [3]:
function load_mnist(split=:train)
    mnist = Flux.Data.MNIST
    
    images = mnist.images(split) # Array with N images of 28x28 8-bits gray
    labels = mnist.labels(split) # Array with N labels scalar 0-9
    
    return images, labels
end

X, y = load_mnist()

println("X: type=$(typeof(X)), size=$(size(X))")
println("y: type=$(typeof(y)), size=$(size(y))")

X: type=Array{Array{ColorTypes.Gray{FixedPointNumbers.Normed{UInt8,8}},2},1}, size=(60000,)
y: type=Array{Int64,1}, size=(60000,)


In [4]:
import Flux: onehot
import Base.Iterators: partition

function make_batch(X, y, batch_size)
    num_examples = length(X)
    num_batches = ceil(Int, num_examples / batch_size)
    
    batches = Array{Tuple{Array{Float32,4},Array{Float32,2}},1}(undef, num_batches)
    batch_indices = partition(1:num_examples, batch_size)
    for (i, indices) in enumerate(batch_indices)
        n = length(indices)
        X_i = Array{Float32,4}(undef, MNISTspec.input_size..., n)
        y_i = Array{Float32,2}(undef, MNISTspec.num_classes, n)
        for (j, k) in zip(1:n, indices)
            X_i[:, :, :, j] = Float32.(reshape(X[k], MNISTspec.input_size...))
            y_i[:, j] = Float32.(onehot(y[k], 0:MNISTspec.num_classes-1))
        end
        batches[i] = (X_i, y_i)
    end
    
    return batches
end

train_data = make_batch(X, y, batch_size)

println("Train Data: type=$(typeof(train_data)), size=$(size(train_data))")

Train Data: type=Array{Tuple{Array{Float32,4},Array{Float32,2}},1}, size=(235,)


In [5]:
size(train_data[1][1])

(28, 28, 1, 256)

In [6]:
size(train_data[end][1])

(28, 28, 1, 96)

In [7]:
size(train_data[1][2])

(10, 256)

In [8]:
size(train_data[end][2])

(10, 96)

In [9]:
test_data = make_batch(load_mnist(:test)..., MNISTspec.test_size)[1]

println("Test Data: type=$(typeof(test_data))")
println(size(test_data[1]))
println(size(test_data[2]))

Test Data: type=Tuple{Array{Float32,4},Array{Float32,2}}
(28, 28, 1, 10000)
(10, 10000)


## Keras-like API

In [10]:
struct Flatten end

(::Flatten)(x) = reshape(x, :, size(x)[end])

struct Reshape
    dims
end

(layer::Reshape)(x) = reshape(x, layer.dims..., size(x)[end])

## MLP

https://github.com/FluxML/model-zoo/blob/master/vision/mnist/mlp.jl

In [11]:
input_size = MNISTspec.input_size
output_size = MNISTspec.num_classes
hidden_size = 500

model = Chain(
    Flatten(),
    Dense(prod(input_size), hidden_size, relu),
    Dense(hidden_size, output_size),
    softmax,
)

model = gpu(model)

model

Chain(Flatten(), Dense(784, 500, relu), Dense(500, 10), softmax)

In [12]:
let
    x1 = gpu(train_data[1][1])
    display(model(x1))
end

10×256 CuArray{Float32,2,Nothing}:
 0.093467   0.129367   0.109898   …  0.0840953  0.0825193  0.08807  
 0.0870419  0.0858421  0.073987      0.0750366  0.166232   0.0609079
 0.0742691  0.0793655  0.102899      0.0530104  0.0719442  0.0770067
 0.0900969  0.113408   0.145694      0.0946832  0.115897   0.0683134
 0.146805   0.135063   0.0947556     0.196797   0.0762894  0.194225 
 0.0595226  0.0751179  0.110293   …  0.0928439  0.0936156  0.0587865
 0.0691995  0.0749764  0.0734426     0.053724   0.0561824  0.0380275
 0.174795   0.156614   0.105358      0.190739   0.13772    0.2101   
 0.105024   0.0980081  0.0809184     0.0994558  0.100085   0.0939066
 0.0997791  0.0522368  0.102756      0.0596143  0.099516   0.110657 

In [13]:
import Flux: logitcrossentropy, onecold
import Statistics: mean

function loss(x, y)
    logits = model[1:end-1](x)
    return logitcrossentropy(logits, y)
end

function accuracy(x, y)
    ŷ = model(x)
    return mean(onecold(cpu(ŷ)) .== onecold(cpu(y)))
end


let
    println("Train")
    
    data1 = gpu(train_data[1])
    loss_ = loss(data1...)
    acc_ = accuracy(data1...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

println()

let
    println("Test")

    data = gpu(test_data)
    loss_ = loss(data...)
    acc_ = accuracy(data...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

Train
Loss: 2.3362398
Accuracy: 0.1015625

Test
Loss: 2.3304803
Accuracy: 0.1184


In [14]:
import Flux: train!
import BSON: bson
import Printf: @printf

optimizer = ADAM(0.001)

best_acc = 0.0
last_improvement = 0
for epoch_idx in 1:100
    # Train for a single epoch
    for i in 1:length(train_data)
        let data = gpu(train_data[i])
            train!(loss, params(model), [data], optimizer)
        end
    end
    
    # Calculate accuracy:
    acc = let data = gpu(test_data)
        accuracy(data...)
    end
    
    @printf("[%d] Test accuracy: %.4f\n", epoch_idx, acc)

    # If our accuracy is good enough, quit out.
    if acc >= 0.999
        println(" -> Early-exiting: We reached our target accuracy of 99.9%")
        break
    end

    # If this is the best accuracy we've seen so far, save the model out
    if acc >= best_acc
        println(" -> New best accuracy! Saving model out to mlp_mnist.bson")
        bson("mlp_mnist.bson", model = cpu(model), epoch = epoch_idx)
        best_acc = acc
        last_improvement = epoch_idx
    end

    # If we haven't seen improvement in 5 epochs, drop our learning rate:
    if epoch_idx - last_improvement >= 5 && optimizer.eta > 1e-6
        optimizer.eta /= 10.0
        println(" -> Haven't improved in a while, dropping learning rate to $(optimizer.eta)!")

        # After dropping learning rate, give it a few epochs to improve
        last_improvement = epoch_idx
    end

    if epoch_idx - last_improvement >= 10
        println(" -> We're calling this converged.")
        break
    end
end

[1] Test accuracy: 0.9496
 -> New best accuracy! Saving model out to mlp_mnist.bson
[2] Test accuracy: 0.9640
 -> New best accuracy! Saving model out to mlp_mnist.bson
[3] Test accuracy: 0.9700
 -> New best accuracy! Saving model out to mlp_mnist.bson
[4] Test accuracy: 0.9729
 -> New best accuracy! Saving model out to mlp_mnist.bson
[5] Test accuracy: 0.9749
 -> New best accuracy! Saving model out to mlp_mnist.bson
[6] Test accuracy: 0.9746
[7] Test accuracy: 0.9757
 -> New best accuracy! Saving model out to mlp_mnist.bson
[8] Test accuracy: 0.9764
 -> New best accuracy! Saving model out to mlp_mnist.bson
[9] Test accuracy: 0.9765
 -> New best accuracy! Saving model out to mlp_mnist.bson
[10] Test accuracy: 0.9767
 -> New best accuracy! Saving model out to mlp_mnist.bson
[11] Test accuracy: 0.9769
 -> New best accuracy! Saving model out to mlp_mnist.bson
[12] Test accuracy: 0.9776
 -> New best accuracy! Saving model out to mlp_mnist.bson
[13] Test accuracy: 0.9788
 -> New best accurac

In [15]:
model = nothing

GC.gc()

In [16]:
import BSON: load

model = load("mlp_mnist.bson")[:model]

model

Chain(Flatten(), Dense(784, 500, relu), Dense(500, 10), softmax)

In [17]:
let
    println("Test")

    loss_ = loss(test_data...)
    acc_ = accuracy(test_data...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

Test
Loss: 0.07892211
Accuracy: 0.9838


## CNN

https://github.com/FluxML/model-zoo/blob/master/vision/mnist/conv.jl

In [18]:
input_size = MNISTspec.input_size
output_size = MNISTspec.num_classes

conv1_kernel_size = (3, 3)
conv1_filter_size = input_size[end] => 16
conv1_pad = (1, 1)
pool1_size = (2, 2)

conv2_kernel_size = (3, 3)
conv2_filter_size = conv1_filter_size.second => 32
conv2_pad = (1, 1)
pool2_size = (2, 2)

conv3_kernel_size = (3, 3)
conv3_filter_size = conv2_filter_size.second => 32
conv3_pad = (1, 1)
pool3_size = (2, 2)

fc1_size = prod(input_size[1:2] .÷ pool1_size .÷ pool2_size .÷ pool3_size) * conv3_filter_size.second

model = Chain(
    Conv(conv1_kernel_size, conv1_filter_size, pad=conv1_pad, relu),
    MaxPool(pool1_size),

    Conv(conv2_kernel_size, conv2_filter_size, pad=conv2_pad, relu),
    MaxPool(pool2_size),

    Conv(conv3_kernel_size, conv3_filter_size, pad=conv3_pad, relu),
    MaxPool(pool3_size),
    
    Flatten(),
    Dense(fc1_size, output_size),
    softmax,
)

model = gpu(model)

model

Chain(Conv((3, 3), 1=>16, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 16=>32, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Flatten(), Dense(288, 10), softmax)

In [19]:
let
    x1 = gpu(train_data[1][1])
    display(model(x1))
end

10×256 CuArray{Float32,2,Nothing}:
 0.113813   0.120842   0.114023   …  0.120016   0.111879   0.122499 
 0.0843035  0.0855561  0.0909145     0.0869676  0.0846347  0.0869116
 0.0987269  0.0973447  0.0875537     0.0978547  0.0981303  0.0905437
 0.0812439  0.0836764  0.0922618     0.0849064  0.0840095  0.0806501
 0.0957331  0.0863312  0.0926881     0.0853744  0.0964106  0.0890015
 0.116707   0.117006   0.108214   …  0.124676   0.112974   0.121648 
 0.0911451  0.0966948  0.0983504     0.0917298  0.0967833  0.0933034
 0.11153    0.111864   0.116489      0.121178   0.111923   0.11844  
 0.102121   0.0882416  0.0952028     0.0840419  0.0997559  0.0929312
 0.104677   0.112443   0.104302      0.103255   0.1035     0.104072 

In [20]:
import Flux: crossentropy, onecold
import Statistics: mean

function loss_noise(x, y)
    x_aug = x .+ 0.1f0 * gpu(randn(eltype(x), size(x)))
    logits = model[1:end-1](x)
    return logitcrossentropy(logits, y)
end

function accuracy(x, y)
    ŷ = model(x)
    return mean(onecold(cpu(ŷ)) .== onecold(cpu(y)))
end


let
    println("Train")
    
    data1 = gpu(train_data[1])
    loss_ = loss(data1...)
    acc_ = accuracy(data1...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

Train
Loss: 2.318602
Accuracy: 0.09375


In [21]:
import Flux: train!
import BSON: bson
import Printf: @printf

optimizer = ADAM(0.001)

best_acc = 0.0
last_improvement = 0
for epoch_idx in 1:100
    # Train for a single epoch
    for i in 1:length(train_data)
        let data = gpu(train_data[i])
            train!(loss_noise, params(model), [data], optimizer)
        end
    end
    
    # Calculate accuracy:
    acc = let data = gpu(test_data)
        accuracy(data...)
    end
    
    @printf("[%d] Test accuracy: %.4f\n", epoch_idx, acc)
    
    # If our accuracy is good enough, quit out.
    if acc >= 0.999
        println(" -> Early-exiting: We reached our target accuracy of 99.9%")
        break
    end

    # If this is the best accuracy we've seen so far, save the model out
    if acc >= best_acc
        println(" -> New best accuracy! Saving model out to cnn_mnist.bson")
        bson("cnn_mnist.bson", model = cpu(model), epoch = epoch_idx)
        best_acc = acc
        last_improvement = epoch_idx
    end

    # If we haven't seen improvement in 5 epochs, drop our learning rate:
    if epoch_idx - last_improvement >= 5 && optimizer.eta > 1e-6
        optimizer.eta /= 10.0
        println(" -> Haven't improved in a while, dropping learning rate to $(optimizer.eta)!")

        # After dropping learning rate, give it a few epochs to improve
        last_improvement = epoch_idx
    end

    if epoch_idx - last_improvement >= 10
        println(" -> We're calling this converged.")
        break
    end
end

[1] Test accuracy: 0.9508
 -> New best accuracy! Saving model out to cnn_mnist.bson
[2] Test accuracy: 0.9747
 -> New best accuracy! Saving model out to cnn_mnist.bson
[3] Test accuracy: 0.9787
 -> New best accuracy! Saving model out to cnn_mnist.bson
[4] Test accuracy: 0.9813
 -> New best accuracy! Saving model out to cnn_mnist.bson
[5] Test accuracy: 0.9838
 -> New best accuracy! Saving model out to cnn_mnist.bson
[6] Test accuracy: 0.9844
 -> New best accuracy! Saving model out to cnn_mnist.bson
[7] Test accuracy: 0.9837
[8] Test accuracy: 0.9848
 -> New best accuracy! Saving model out to cnn_mnist.bson
[9] Test accuracy: 0.9862
 -> New best accuracy! Saving model out to cnn_mnist.bson
[10] Test accuracy: 0.9868
 -> New best accuracy! Saving model out to cnn_mnist.bson
[11] Test accuracy: 0.9876
 -> New best accuracy! Saving model out to cnn_mnist.bson
[12] Test accuracy: 0.9884
 -> New best accuracy! Saving model out to cnn_mnist.bson
[13] Test accuracy: 0.9883
[14] Test accuracy: 

In [22]:
model = nothing

GC.gc()

In [23]:
import BSON: load

model = load("cnn_mnist.bson")[:model]

model

Chain(Conv((3, 3), 1=>16, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 16=>32, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Flatten(), Dense(288, 10), softmax)

In [24]:
let
    println("Test")

    loss_ = loss(test_data...)
    acc_ = accuracy(test_data...)

    println("Loss: ", loss_)
    println("Accuracy: ", acc_)
end

Test
Loss: 0.035116393
Accuracy: 0.9914


## Autoencoder

https://github.com/FluxML/model-zoo/blob/master/vision/mnist/autoencoder.jl

In [25]:
input_size = MNISTspec.input_size
hidden_size = 500
encoded_size = 50

encoder = Chain(
    Flatten(),
    Dense(prod(input_size), hidden_size, leakyrelu),
    Dense(hidden_size, encoded_size, leakyrelu),
)

decoder = Chain(
    Dense(encoded_size, hidden_size, leakyrelu),
    Dense(hidden_size, prod(input_size), leakyrelu),
    Reshape(input_size),
)

model = Chain(encoder, decoder)

model = gpu(model)

model

Chain(Chain(Flatten(), Dense(784, 500, leakyrelu), Dense(500, 50, leakyrelu)), Chain(Dense(50, 500, leakyrelu), Dense(500, 784, leakyrelu), Reshape((28, 28, 1))))

In [26]:
let
    x1 = gpu(train_data[1][1])
    x̂1 = model(x1)
    println("x : type=$(typeof(x1)), size=$(size(x1))")
    println("x̂ : type=$(typeof(x̂1)), size=$(size(x̂1))")
end

x : type=CuArray{Float32,4,Nothing}, size=(28, 28, 1, 256)
x̂ : type=CuArray{Float32,4,CuArray{Float32,2,Nothing}}, size=(28, 28, 1, 256)


In [27]:
function loss(x)
    x̂ = model(x)
    err = x̂ .- x
    N = length(x)
    return sum(err .* err) / N
end

let
    println("Train")
    x1 = gpu(train_data[1][1])
    loss_ = loss(x1)
    println("Loss: ", loss_)
end

let
    println("Test")
    data = gpu(test_data[1])
    loss_ = loss(data)
    println("Loss: ", loss_)
end

Train
Loss: 0.10478948
Test
Loss: 0.10879664


In [28]:
import Flux: train!
import BSON: bson
import Printf: @printf

optimizer = ADAM(0.001)

best_loss = Inf
last_improvement = 0
for epoch_idx in 1:100
    # Train for a single epoch
    for i in 1:length(train_data)
        let data = gpu(train_data[i][1])
            train!(loss, params(model), [(data,)], optimizer)
        end
    end
    
    # Calculate accuracy:
    loss_ = let data = gpu(test_data[1])
        loss(data)
    end
    
    @printf("[%d] Test loss: %.4f\n", epoch_idx, loss_)
    
    # If this is the best accuracy we've seen so far, save the model out
    if loss_ <= best_loss
        println(" -> New best loss! Saving model out to ae_mnist.bson")
        bson("ae_mnist.bson", model = cpu(model), epoch = epoch_idx)
        best_loss = loss_
        last_improvement = epoch_idx
    end

    # If we haven't seen improvement in 5 epochs, drop our learning rate:
    if epoch_idx - last_improvement >= 5 && optimizer.eta > 1e-6
        optimizer.eta /= 10.0
        println(" -> Haven't improved in a while, dropping learning rate to $(optimizer.eta)!")

        # After dropping learning rate, give it a few epochs to improve
        last_improvement = epoch_idx
    end

    if epoch_idx - last_improvement >= 10
        println(" -> We're calling this converged.")
        break
    end
end

[1] Test loss: 0.0119
 -> New best loss! Saving model out to ae_mnist.bson
[2] Test loss: 0.0094
 -> New best loss! Saving model out to ae_mnist.bson
[3] Test loss: 0.0086
 -> New best loss! Saving model out to ae_mnist.bson
[4] Test loss: 0.0079
 -> New best loss! Saving model out to ae_mnist.bson
[5] Test loss: 0.0074
 -> New best loss! Saving model out to ae_mnist.bson
[6] Test loss: 0.0070
 -> New best loss! Saving model out to ae_mnist.bson
[7] Test loss: 0.0068
 -> New best loss! Saving model out to ae_mnist.bson
[8] Test loss: 0.0065
 -> New best loss! Saving model out to ae_mnist.bson
[9] Test loss: 0.0062
 -> New best loss! Saving model out to ae_mnist.bson
[10] Test loss: 0.0061
 -> New best loss! Saving model out to ae_mnist.bson
[11] Test loss: 0.0059
 -> New best loss! Saving model out to ae_mnist.bson
[12] Test loss: 0.0058
 -> New best loss! Saving model out to ae_mnist.bson
[13] Test loss: 0.0057
 -> New best loss! Saving model out to ae_mnist.bson
[14] Test loss: 0.005

In [29]:
model = nothing

GC.gc()

In [30]:
import BSON: load

model = load("ae_mnist.bson")[:model]

model

Chain(Chain(Flatten(), Dense(784, 500, leakyrelu), Dense(500, 50, leakyrelu)), Chain(Dense(50, 500, leakyrelu), Dense(500, 784, leakyrelu), Reshape((28, 28, 1))))

In [31]:
let
    println("Test")
    loss_ = loss(test_data[1])
    println("Loss: ", loss_)
end

Test
Loss: 0.0036881757
