#### PCA + Neural Network Classifier with MNIST

##### Libraries

In [None]:
using MLDatasets           # mnist
using Images
using PreprocessingImages; pim = PreprocessingImages
using PreprocessingArrays; pa  = PreprocessingArrays

using MLJ                  # make_blobs, rmse, confmat, categorical
using MLDataUtils          # label, nlabel, labelfreq
using MLJFlux

using Metrics              # r2-score
using Random
using Plots; gr()
using StatsPlots
using DataFrames

##### Functions

In [None]:
# metrics
function printMetrics(ŷ, y)
    display(confmat(ŷ, y))
    println("accuracy: ", round(accuracy(ŷ, y); digits=3))
    println("f1-score: ", round(multiclass_f1score(ŷ, y); digits=3))
end


##### MNIST

In [None]:
# load mnist
datasetX,    datasetY    = MNIST(:train)[:]
validationX, validationY = MNIST(:test)[:]

display( size(datasetX) )

img  = datasetX[:, :, 1:5]
img2 = permutedims(img, (2, 1, 3))

display(datasetY[1:5]')
mosaicview( Gray.(img2)  ; nrow=1)

In [None]:
# split trainset, testset from dataset
Random.seed!(1)
(trainX, trainY), (testX, testY) = stratifiedobs((datasetX, datasetY), p = 0.7)
size(trainX), size(testX), size(validationX)

##### Preprocessing

In [None]:
function preprocess(X, y)
    newX = pim.batchImage2DF(X)
    #coerce!(newX)   # no need, all scitypes are Continuous in this example
    new_y = coerce(y, OrderedFactor)
    
    return (newX, new_y)
end

X_tr, y_tr = preprocess(trainX, trainY);

In [None]:
scitype(X_tr)

In [None]:
scitype(y_tr)

##### Pipe the model

In [None]:
models("PCA")[2]

In [None]:
# reduce predictors
PCA = @load PCA pkg=MultivariateStats verbosity=0
reducer = PCA(pratio = 0.95)

In [None]:
# standardize predictors
std = Standardizer();

In [None]:
models("Neural")[2]

Eventually, one might see next, the downloading of "CUDA artifacts" even if Pkg CUDA is installed in Julia and CUDA libraries are present on the computer. This is yet another disadvantage of MLJ over Flux.

In [None]:
# nnet
NeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux verbosity=0
nnet = NeuralNetworkClassifier(acceleration=CUDALibs())

In [None]:
pipe = @pipeline reducer std nnet

##### Training


In [None]:
pipe.neural_network_classifier.epochs = 1   # = 1 for debug, default = 10
mach = MLJ.machine(pipe, X_tr, y_tr) |> fit!

In [None]:
# # save model
# MLJ.save("pca-nn-trained-model.jlso", mach)

In [None]:
# inspect model
fp = fitted_params(mach);
rp = report(mach);

In [None]:
losses = rp.neural_network_classifier.training_losses
epochs = pipe.neural_network_classifier.epochs
plot(0:epochs, losses, title="Error function", size=(500,300), linewidth=2, legend=false)
xlabel!("Epochs")
ylabel!("Cross-entropy loss")

##### Testing

In [None]:
X_ts, y_ts = preprocess(testX, testY);
ŷ = predict_mode(mach, X_ts)
printMetrics(ŷ, y_ts)

##### Validation

In [None]:
X_v, y_v = preprocess(validationX, validationY)
ŷ = predict_mode(mach, X_v)
printMetrics(ŷ, y_v)