### Flux with MNIST and GPU


### Libraries

In [None]:
using CUDA
using Flux              # dense, softmax, sigmoid
using MLJ               # make_blobs, rmse, confmat, f1score, coerce
using MLDataUtils       # label, nlabel, labelfreq, stratifiedobs
using MLDatasets        # mnist
using Random
using Plots; gr()

### Functions

In [None]:
# metrics
function printMetrics(ŷ, y)
    display(confmat(ŷ, y))
    println("accuracy: ", round(accuracy(ŷ, y); digits=3))
    println("f1-score: ", round(multiclass_f1score(ŷ, y); digits=3))
end

### Dataset

In [None]:
# load mnist from MLDatasets
trainX_original,      trainY_original      = MNIST.traindata()
validationX_original, validationY_original = MNIST.testdata();

In [None]:
# split trainset, testset, validation set
Random.seed!(1)
(trainX, trainY), (testX, testY) = stratifiedobs((trainX_original, trainY_original), p = 0.7)
validationX = copy(validationX_original); validationY = copy(validationY_original)

size(trainX), size(testX), size(validationX)

### Preprocessing

Flux primarily works with matrices and vectors

In [None]:
function preprocess(X, y)
    Xs = Float32.(X) |> Flux.flatten
    ys = Flux.onehotbatch( Float32.(y), 0:9 )
    
    return (Xs, ys)
end

h, v, N = size(trainX); d = h * v
X, y = preprocess(trainX, trainY);

In [None]:
d, N, size(X), size(y)

### Model

In [None]:
# data on gpu
X_d = X |> gpu
y_d = y |> gpu;

In [None]:
# model configuration
nInputs  = d
nOutputs = 10

model = Chain( Flux.Dense(nInputs, nOutputs, tanh), softmax ) |> gpu   # weights on gpu

lossFunction(X, y) = Flux.mse( model(X), y )
modelParameters    = Flux.params(model)
data               = Flux.DataLoader((X_d, y_d), batchsize=256)        # default batchsize=1
callBack           = Flux.throttle(() -> println("training"), 10);     # print every 10s

### Train

In [None]:
# custom made function that provides access to loss function outcome
function trainModel!(loss, ps, data, opt)
    dataLosses = Vector{Float32}()
        
    for d in data
        l = loss(d...)
        gs = gradient(ps) do
            loss(d...)
        end
        Flux.update!(opt, ps, gs)
            
        push!(dataLosses, l)
    end
    
    return mean(dataLosses)
end

In [None]:
@time trainModel!( lossFunction, modelParameters, data, Flux.Descent() )

In [None]:
numberOfEpochs = 100;

In [None]:
epochs = 1 : numberOfEpochs
epochLosses = Vector{Float32}()

for epoch in epochs
    l = trainModel!( lossFunction, modelParameters, data, Flux.Descent() )
    push!(epochLosses, l)
end

In [None]:
plot(epochs, epochLosses, size=(300,200), linewidth=2, legend=false)

### Predict

In [None]:
function predictOutcome(X)
    ŷ = Flux.onecold( model(X), [0:9;] )
end

ŷ = predictOutcome(X_d)
printMetrics( coerce(ŷ, OrderedFactor), coerce(trainY, OrderedFactor) )