# Flux with MNIST


## Libraries

In [None]:
using Flux              # the julia ml library
using MLJ               # make_blobs, rmse, confmat, f1score, coerce
using MLDataUtils       # label, nlabel, labelfreq, stratifiedobs
using MLDatasets        # mnist
using Random
using Plots; gr()

## Functions

In [None]:
# metrics
function printMetrics(ŷ, y)
    display(confmat(ŷ, y))
    println("accuracy: ", round(accuracy(ŷ, y); digits=3))
    println("f1-score: ", round(multiclass_f1score(ŷ, y); digits=3))
end


## Dataset

In [None]:
# load mnist from MLDatasets
trainX_original,      trainY_original      = MNIST.traindata()
validationX_original, validationY_original = MNIST.testdata();


In [None]:
# split trainset, testset, validation set
Random.seed!(1)
(trainX, trainY), (testX, testY) = stratifiedobs((trainX_original, trainY_original), p = 0.7)
validationX = copy(validationX_original); validationY = copy(validationY_original)

size(trainX), size(testX), size(validationX)

## Preprocessing

Flux primarily works with matrices and vectors

In [None]:
function preprocess(X, y)
    Xs = Float32.(X) |> Flux.flatten
    ys = Flux.onehotbatch( Float32.(y), 0:9 )
    
    return (Xs, ys)
end

h, v, N = size(trainX); d = h * v
X, y = preprocess(trainX, trainY);

In [None]:
d, N, size(X), size(y)

### tanh

In [None]:
plot(-5:5, tanh.(-5:5), size=(300,200), linewidth=2, legend=false)


### Model

Stochastic Gradient Descent (SGD) means batchsize=1.

The error on every and each data line is computed, then weights are recalculated towards the negative of the gradient.

For this reason, calculation is done at CPU (sequencial computation over one data point at a time).

In [None]:
# model configuration
nInputs  = d
nOutputs = 10
model = Chain( Flux.Dense(nInputs, nOutputs, tanh),   # tanh is chosen as nonlinearity (Prof Mostafa lecture)
               softmax )                              # softmax scales the output to sum to one

lossFunction(X, y) = Flux.mse( model(X), y )
modelParameters    = Flux.params(model)
data               = Flux.DataLoader((X, y), batchsize=1)             # default batchsize=1
callBack           = Flux.throttle(() -> println("training"), 10);    # print every 10s


### Train method 1

In [None]:
numberOfEpochs = 10;

In [None]:
# Flux.train!(loss, params, data, opt; cb)
@time Flux.train!(lossFunction, modelParameters, data, Flux.Descent())   # single epoch, η = 0.1 (default)
# cpu time (batchsize=1): 3-4s

In [None]:
Flux.@epochs 10 Flux.train!(lossFunction, modelParameters, data, Flux.Descent(); cb=callBack);   # multiple epochs

In [None]:
# preferred for multiple epochs
epochs = 1:numberOfEpochs
@time for epoch in epochs Flux.train!(lossFunction, modelParameters, data, Flux.Descent(); cb=callBack) end

### Predict

In [None]:
function predictOutcome(X)
    ŷ = Flux.onecold( model(X), [0:9;] )
end

ŷ = predictOutcome(X);


In [None]:
printMetrics( coerce(ŷ, OrderedFactor), coerce(trainY, OrderedFactor) )

### Demo ML

Press CTRL-ENTER on the cell to change.

In [None]:
v = rand(1:N, 8)
display([MNIST.convert2image(trainX[:,:,i]) for i in v])
ŷ[v]'