#### Flux with MNIST and GPU


##### Libraries

In [None]:
using CUDA

using MLDatasets           # mnist
using Images
using PreprocessingImages; pim = PreprocessingImages
using PreprocessingArrays; pa  = PreprocessingArrays

using MLJ                  # make_blobs, rmse, confmat, f1score, coerce
using MLDataUtils          # label, nlabel, labelfreq, stratifiedobs
using Flux                 # the julia ml library

using Plots
using Random
using DataFrames

In [None]:
include("../libs/libml.jl")

##### MNIST

In [None]:
# load mnist
datasetX,    datasetY    = MNIST(:train)[:]
validationX, validationY = MNIST(:test)[:]

display( size(datasetX) )

img  = datasetX[:, :, 1:5]
img2 = permutedims(img, (2, 1, 3))

display(datasetY[1:5]')
mosaicview( Gray.(img2)  ; nrow=1)

In [None]:
# split trainset, testset from dataset
Random.seed!(1)
(trainX, trainY), (testX, testY) = MLDataUtils.stratifiedobs((datasetX, datasetY), p = 0.7)
size(trainX), size(testX), size(validationX)

##### Preprocessing

Flux primarily works with matrices and vectors

In [None]:
function preprocess(X, y)
    Xs = Float32.(X) |> Flux.flatten
    ys = Flux.onehotbatch( Float32.(y), 0:9 )
    
    return (Xs, ys)
end

h, v, N = size(trainX); d = h * v
tr_X, tr_y = preprocess(trainX, trainY)
d, N, size(tr_X), size(tr_y)

##### Model

In [None]:
# data on gpu
X_d = tr_X |> gpu
y_d = tr_y |> gpu;

In [None]:
# model configuration
nInputs  = d
nOutputs = 10

# model = Chain( Flux.Dense(nInputs, 256, tanh),
#                Flux.Dense(256, nOutputs, relu),
#                softmax ) |> gpu                                      # weights on gpu

model = Chain( Dense(nInputs => 128, relu),
               Dense(128     => 32,  relu),
               Dense(32      => nOutputs),
               softmax)      |> gpu                                   # weights on gpu

In [None]:
lossFunction(X, y) = Flux.mse( model(X), y )
modelParameters    = Flux.params(model)
data               = Flux.DataLoader((X_d, y_d), batchsize=1)        # batchsize=1 => SGD, batch size > 1 => batch gradient descent
modelOptimizer     = Flux.Descent()
callBack           = Flux.throttle(() -> println("training"), 10);   # print every 10s

##### Training

In [None]:
numberOfEpochs = 500

Training method without plotting epochs

In [None]:
# # https://fluxml.ai/Flux.jl/stable/training/training/

# # preferred for multiple epochs
# epochs = 1:numberOfEpochs
# for epoch in epochs
#     Flux.train!(lossFunction, modelParameters, data, modelOptimizer; cb=callBack)
# end

Training epoch with plotting, with the aid of homemade function trainModel!

In [None]:
# https://fluxml.ai/Flux.jl/stable/training/training/

epochs      = 1 : numberOfEpochs
lossVector  = Vector{Float64}()
minLoss     = 1e-3   # hint: start at 1e-2, then go lower for more epochs (1e-5)

for epoch in epochs
    # train epoch
    loss = trainModel!( lossFunction, modelParameters, data, modelOptimizer )   # libml
    push!(lossVector, loss)

    # exit criteria
    print(".")   # "epoch completed" indicator
    if stopTrainingCriteria(lossVector, minLoss)   break   end
end

In [None]:
# plot evolution
# plotTrainingEvolution(epochLosses, deltaLosses)
Plots.plot(1:size(lossVector)[1], lossVector, size=(400,300), linewidth=2, legend=false, yaxis=:log,
                  title="Loss function")

##### Testing

In [None]:
function predictOutcome(X)
    ŷ = Flux.onecold( model(X), [0:9;] )
end

In [None]:
# preprocessing
ts_X, ts_y = preprocess(testX, testY)
X_d        = ts_X |> gpu

# predict
ŷ = predictOutcome(X_d)

# result
printMetrics( ŷ, coerce(testY, OrderedFactor) )