Flux with MNIST and GPU


Libraries

In [None]:
using CUDA

using MLDatasets           # mnist
using PreprocessingImages; pim = PreprocessingImages

using MLDataUtils          # label, nlabel, labelfreq, stratifiedobs
using Flux                 # the julia ml library

using Random
using Plots; gr()

In [None]:
include( expanduser("~/projects/pesquisa/libs/libml.jl") )
include( expanduser("~/projects/pesquisa/libs/misc.jl") )

MNIST

In [None]:
# load mnist
trX,   trY   = MNIST(:train)[:]
testX, testY = MNIST(:test)[:];

In [None]:
# split train, validation, test sets
Random.seed!(1)
(trainX, trainY), (validationX, validationY) = stratifiedobs((trX, trY), p = 0.7)
size(trainX), size(validationX), size(testX)

Preprocessing

In [None]:
function preprocess(X, y)
    Xs = permutedims(X, (2, 1, 3))
    Xs = Xs |> Flux.flatten
    
    ys = Flux.onehotbatch( Float32.(y), 0:9 )
    
    return (Xs, ys)
end

h, v, N  = size(trainX); d = h * v

X_c, y_c = preprocess(trainX, trainY)
X_tr     = X_c |> gpu
y_tr     = y_c |> gpu

X_c, y_c = preprocess(validationX, validationY)   # não cabe na gpu
X_val    = X_c |> gpu;

Model

In [None]:
# model configuration
nInputs  = d
nOutputs = 10

# Flux.Dense(nInputs, 256, tanh)
model = Chain( Dense(nInputs => 128, relu),
               Dense(128     => 32,  relu),
               Dense(32      => nOutputs),
               softmax)      |> gpu           # weights on gpu

In [None]:
lossFunction(X, y) = Flux.mse( model(X), y )
modelParameters    = Flux.params(model)
dataset            = Flux.DataLoader((X_tr, y_tr), batchsize=32)   # batchsize=1 => SGD, batch size > 1 => batch gradient descent
modelOptimizer     = Flux.Descent();
# callBack           = Flux.throttle(() -> println("."), 10);        # print every 10s

Training

In [None]:
numberOfEpochs = 1000

In [None]:
function predictOutcome(X)
    ŷ = Flux.onecold( model(X), [0:9;] )
end

Training method without plotting epochs

In [None]:
# # https://fluxml.ai/Flux.jl/stable/training/training/

# # preferred for multiple epochs
# epochs = 1:numberOfEpochs
# for epoch in epochs
#     Flux.train!(lossFunction, modelParameters, data, modelOptimizer; cb=callBack)
# end

Training epoch with plotting, with the aid of homemade function trainModel!

In [None]:
# https://fluxml.ai/Flux.jl/stable/training/training/

epochs   = 1 : numberOfEpochs
loss_tr  = Vector{Float64}()
acc_val  = Vector{Float64}()
minLoss  = 1e-6   # hint: start at 1e-2, then go lower for more epochs (5e-4)
nearZero = minLoss / 25

for epoch in epochs
    # train
    loss = trainModel!( lossFunction, modelParameters, dataset, modelOptimizer )   # libml
    push!(loss_tr, loss)
    # print(".")   # "epoch completed" indicator

    # predict
    ŷ  = predictOutcome(X_val)
    f1 = MLJ.multiclass_f1score(ŷ, coerce(validationY, OrderedFactor))
    push!(acc_val, f1)

    # exit criteria
    # if stopTrainingCriteria(loss_tr, minLoss, nearZero)   break   end
end

In [None]:
# clean gpu memory
X_tr  = nothing
y_tr  = nothing
X_val = nothing
cleanCUDA()

In [None]:
# plot training
s = (500, 500)
p1 = plotVector(loss_tr, s, :log10, "Loss function")
p2 = plotVector(acc_val, s, :none,  "f1-score")
plot(p1, p2, layout=(2,1), size=s)

Testing

In [None]:
# preprocessing
X_c, y_c = preprocess(testX, testY)
X_tst    = X_c |> gpu

# predict
ŷ = predictOutcome(X_tst)

# result
printMetrics( ŷ, coerce(testY, OrderedFactor) )

In [None]:
# clean gpu memory
X_tst = nothing
cleanCUDA()