Flux with PCA
* Runs on CPU only, no CUDA call
* Multithreads, if any, are hidden by the Flux library implementation
* SGD (one data point per epoch)

Libraries

In [None]:
using MLDatasets           # mnist
using PreprocessingImages; pim = PreprocessingImages

using MLJ                  # make_blobs, rmse, confmat, f1score, coerce
using MLDataUtils          # label, nlabel, labelfreq, stratifiedobs
using Flux                 # the julia ml library

using Random
using DataFrames

In [None]:
include( expanduser("~/projects/pesquisa/libs/libml.jl") )

MNIST

In [None]:
# load mnist
trX,   trY   = MNIST(:train)[:]
testX, testY = MNIST(:test)[:];

In [None]:
# split train, validation, test sets
Random.seed!(1)
(trainX, trainY), (validationX, validationY) = stratifiedobs((trX, trY), p = 0.7)
size(trainX), size(validationX), size(testX)

Preprocessing

In [None]:
# MLJ works with dataframes
function preprocess1(X, y)
    Xs = permutedims(X, (2, 1, 3))   # needed to adjust visualization of digits
    Xs = pim.batchImage2DF(Xs)
    
    ys = Int32.(y)

    return (Xs, ys)
end

h, v, N = size(trainX); d = h * v
X_c, y_c = preprocess1(trainX, trainY)
typeof(X_c), size(X_c), size(y_c)

In [None]:
# reduce predictors
PCA = @load PCA pkg=MultivariateStats verbosity=0
reducer = PCA(pratio = 0.95)

# standardize predictors
std = Standardizer()

# execute
pipe = @pipeline reducer std
mach = MLJ.machine(pipe, X_c) |> fit!
X_c  = MLJ.transform(mach, X_c) .|> Float32   # transform(unsupervised) vs predict(supervised)
N, d = size(X_c)
typeof(X_c), size(X_c)

In [None]:
# Flux works with matices and vectors
function preprocess2(X, y)
    Xs = df2FluxMatrix(X)
    ys = Flux.onehotbatch( Float32.(y), 0:9 )
    return (Xs, ys)
end

X_c, y_c = preprocess2(X_c, y_c)
typeof(X_c), size(X_c), size(y_c)

Model

In [None]:
# model configuration
nInputs  = d
nOutputs = 10

model              = Chain( Flux.Dense(nInputs, nOutputs, tanh),   # tanh is chosen as nonlinearity (Prof Mostafa lecture)
                            softmax )                              # softmax scales the output to sum to one
lossFunction(X, y) = Flux.mse( model(X), y )
modelParameters    = Flux.params(model)
dataset            = Flux.DataLoader((X_c, y_c), batchsize=1)  # batchsize=1 => SGD, batch size > 1 => batch gradient descent
modelOptimizer     = Flux.Descent()
callBack           = Flux.throttle(() -> println("."), 10); # print every 10s

Training

In [None]:
numberOfEpochs = 5

Training method without plotting epochs

In [None]:
# # preferred for multiple epochs
# epochs = 1:numberOfEpochs
# for epoch in epochs
#     Flux.train!(lossFunction, modelParameters, data, modelOptimizer; cb=callBack)
# end

Training method with plotting, with the aid of homemade function trainModel!

In [None]:
# https://fluxml.ai/Flux.jl/stable/training/training/

epochs     = 1 : numberOfEpochs
lossVector = Vector{Float64}()
minLoss    = 1e-5   # hint: start at 1e-2, then go lower for more epochs (5e-4)
nearZero   = minLoss / 25

for epoch in epochs
    # train epoch
    loss = trainModel!( lossFunction, modelParameters, dataset, modelOptimizer )   # libml
    push!(lossVector, loss)
    print(".")   # "epoch completed" indicator

    # exit criteria
    if stopTrainingCriteria(lossVector, minLoss, nearZero)   break   end
end

In [None]:
# plot training
plotLoss(lossVector, (500,300))

Validation

In [None]:
function predictOutcome(X)
    ŷ = Flux.onecold( model(X), [0:9;] )
end

In [None]:
# preprocessing
X_c, y_c = preprocess1(validationX, validationY)
X_c      = MLJ.transform(mach, X_c)
X_c, y_c = preprocess2(X_c, y_c)

# predict
ŷ = predictOutcome(X_c)

# result
printMetrics( ŷ, coerce(validationY, OrderedFactor) )