## Flux with PCA

Under development

## Libraries

In [1]:
#using CUDA

using Flux              # the julia ml library
using Images            # image processing and machine vision for julia

using MLJ               # make_blobs, rmse, confmat, f1score, coerce
#using MLJFlux           # NeuralNetworkClassifier, CUDALibs
using MLDataUtils       # label, nlabel, labelfreq, stratifiedobs
using MLDatasets        # mnist

using LinearAlgebra     # pinv pseudo-inverse matrix
#using Metrics           # r2-score
using Random
using StatsBase         # standardize (normalization)
using Distributions

using Plots; gr()
using StatsPlots
using Printf

#using CSV
using DataFrames


## Functions

In [2]:
# metrics
function printMetrics(ŷ, y)
    display(confmat(ŷ, y))
    println("accuracy: ", round(accuracy(ŷ, y); digits=3))
    println("f1-score: ", round(multiclass_f1score(ŷ, y); digits=3))
end


printMetrics (generic function with 1 method)

In [3]:
image2Vector(M) = vec( Float32.(M) )   # 32-bits is faster on GPU

function batchImage2Vector(imagesArray3D)
    h, v, N = size(imagesArray3D)
    vectorOfImageVectors = [ image2Vector( imagesArray3D[:, :, i] ) for i in 1:N]
end

function batchImage2Matrix(imagesArray3D)
    vectorOfImageVectors = batchImage2Vector(imagesArray3D)
    M = reduce(hcat, vectorOfImageVectors)
    M'
end

function batchImage2DF(imagesArray3D)
    M = batchImage2Matrix(imagesArray3D)
    DataFrame(M, :auto)
end


batchImage2DF (generic function with 1 method)

## Dataset

In [4]:
# load mnist from MLDatasets
trainX_original,      trainY_original      = MNIST.traindata()
validationX_original, validationY_original = MNIST.testdata();


In [5]:
# split trainset, testset, validation set
Random.seed!(1)
(trainX, trainY), (testX, testY) = stratifiedobs((trainX_original, trainY_original), p = 0.7)
validationX = copy(validationX_original); validationY = copy(validationY_original)

size(trainX), size(testX), size(validationX)

((28, 28, 42001), (28, 28, 17999), (28, 28, 10000))

## Preprocessing


In [6]:
function preprocess(X)
    newX = batchImage2DF(X)
    #coerce!(newX)   # no need, all scitypes are Continuous in this example
    #new_y = coerce(y, OrderedFactor)
    
    return newX
end

X = preprocess(trainX);

In [7]:
# reduce predictors
PCA = @load PCA pkg=MultivariateStats verbosity=0
reducer = PCA(pratio = 0.95)

# standardize predictors
std = Standardizer()

# execute
pipe = @pipeline reducer std
mach = MLJ.machine(pipe, X) |> fit!
X_til = MLJ.transform(mach, X);

┌ Info: Training Machine{Pipeline290,…}.
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:464
┌ Info: Training Machine{PCA,…}.
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:464
┌ Info: Training Machine{Standardizer,…}.
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:464


In [8]:
function preprocess2(X, y)
    N, d = size(X)
    Xs = X |> Matrix 
    Xs = Float32.(Xs) |> Flux.flatten
    Xs = [Xs[i,:] for i in 1:N]
    Xs = Flux.batch(Xs)
    ys = Flux.onehotbatch( Float32.(y), 0:9 )
    
    return (Xs, ys)
end

X, y = preprocess2(X_til, trainY);

### Model

In [9]:
# model configuration
nInputs  = size(X_til)[2]
nOutputs = 10
model = Chain( Flux.Dense(nInputs, nOutputs, tanh),   # tanh is chosen as nonlinearity (Prof Mostafa lecture)
               softmax )                              # softmax scales the output to sum to one

lossFunction(X, y) = Flux.mse( model(X), y )
modelParameters    = Flux.params(model)
data               = Flux.DataLoader((X, y), batchsize=1)             # default batchsize=1
callBack           = Flux.throttle(() -> println("training"), 10);    # print every 10s


In [10]:
numberOfEpochs = 10;

In [11]:
# preferred for multiple epochs
epochs = 1:numberOfEpochs
@time for epoch in epochs Flux.train!(lossFunction, modelParameters, data, Flux.Descent(); cb=callBack) end

training
training
training
 43.942507 seconds (132.53 M allocations: 11.337 GiB, 2.92% gc time, 54.11% compilation time)


### Predict

In [12]:
function predictOutcome(X)
    ŷ = Flux.onecold( model(X), [0:9;] )
end

ŷ = predictOutcome(X);


In [13]:
printMetrics( coerce(ŷ, OrderedFactor), coerce(trainY, OrderedFactor) )

10×10 Matrix{Int64}:
 4056     2    32    20    12    79    30    18    46    35
    2  4585    32    25    23    22    21    38   137    16
    8    23  3706    97    19    21    14    40    49    13
    5    11    64  3817     6   130     1     9    86    73
    8     3    71    16  3772    59    26    66    56   160
   16    23    16   104     4  3207    56     5    99    37
   22     7    60    31    33    94  3961     6    39     3
    4    16    70    52     7    25     4  4078    17   122
   18    41   100    77    41   104    25    13  3489    39
    7     8    20    53   172    54     5   113    78  3666

accuracy: 0.913
f1-score: 0.911
