# PCA + Standardizer with MNIST


## Libraries

In [1]:
# libraries
#using CUDA

using Flux              # the julia ml library
using Images            # image processing and machine vision for julia

using MLJ               # make_blobs, rmse, confmat, f1score, coerce
#using MLJFlux           # NeuralNetworkClassifier, CUDALibs
using MLDataUtils       # label, nlabel, labelfreq, stratifiedobs
using MLDatasets        # mnist

#using LinearAlgebra     # pinv pseudo-inverse matrix
#using Metrics           # r2-score
using Random
#using StatsBase         # standardize (normalization)
#using Distributions

using Plots; gr()
using StatsPlots
using Printf

using CSV
using DataFrames


## Functions

In [2]:
image2Vector(M) = vec( Float32.(M) )   # 32-bits is faster on GPU

function batchImage2Vector(imagesArray3D)
    h, v, N = size(imagesArray3D)
    vectorOfImageVectors = [ image2Vector( imagesArray3D[:, :, i] ) for i in 1:N]
end

function batchImage2Matrix(imagesArray3D)
    vectorOfImageVectors = batchImage2Vector(imagesArray3D)
    M = reduce(hcat, vectorOfImageVectors)
    M'
end

function batchImage2DF(imagesArray3D)
    M = batchImage2Matrix(imagesArray3D)
    DataFrame(M, :auto)
end


batchImage2DF (generic function with 1 method)

In [3]:
# metrics
function printMetrics(ŷ, y)
    display(confmat(ŷ, y))
    println("accuracy: ", round(accuracy(ŷ, y); digits=3))
    println("f1-score: ", round(multiclass_f1score(ŷ, y); digits=3))
end


printMetrics (generic function with 1 method)

## Dataset

In [4]:
# load mnist from MLDatasets
trainX_original,      trainY_original      = MNIST.traindata()
validationX_original, validationY_original = MNIST.testdata();


In [5]:
# split trainset, testset, validation set
Random.seed!(1)
(trainX, trainY), (testX, testY) = stratifiedobs((trainX_original, trainY_original), p = 0.7)
validationX = copy(validationX_original); validationY = copy(validationY_original)

size(trainX), size(testX), size(validationX)

((28, 28, 42001), (28, 28, 17999), (28, 28, 10000))

## Preprocessing


In [6]:
function preprocess(X, y)
    newX = batchImage2DF(X)
    #coerce!(newX)   # no need, all scitypes are Continuous in this example
    new_y = coerce(y, OrderedFactor)
    
    return (newX, new_y)
end

X, y = preprocess(trainX, trainY);

In [7]:
scitype(X)

Table{AbstractVector{Continuous}}

In [8]:
scitype(y)

AbstractVector{OrderedFactor{10}} (alias for AbstractArray{OrderedFactor{10}, 1})

## Reduce dimensions and standardize

In [9]:
models("SVMClass")[1]

[35mC-Support Vector Classification.[39m
[35m→ based on [ScikitLearn](https://github.com/cstjean/ScikitLearn.jl).[39m
[35m→ do `@load SVMClassifier pkg="ScikitLearn"` to use the model.[39m
[35m→ do `?SVMClassifier` for documentation.[39m
(name = "SVMClassifier",
 package_name = "ScikitLearn",
 is_supervised = true,
 abstract_type = Deterministic,
 deep_properties = (),
 docstring =
     """
     C-Support Vector Classification.
     → based on [ScikitLearn](https://github.com/cstjean/ScikitLearn.jl).
     → do `@load SVMClassifier pkg="ScikitLearn"` to use the model.
     → do `?SVMClassifier` for documentation.""",
 fit_data_scitype =
     Tuple{Table{_s28} where _s28<:(AbstractVector{_s29} where _s29<:Continuous), AbstractVector{_s1162} where _s1162<:Finite},
 hyperparameter_ranges = (nothing,
                          nothing,
                          nothing,
                          nothing,
                          nothing,
                          nothing,
          

In [14]:
# reduce predictors
PCA = @load PCA pkg=MultivariateStats verbosity=0
reducer = PCA(pratio = 0.9)

# standardize predictors
std = Standardizer()

# svm scikitlearn
SVMC = @load SVMClassifier pkg="ScikitLearn"
svm = SVMC()


import MLJScikitLearnInterface ✔


┌ Info: For silent loading, specify `verbosity=0`. 
└ @ Main /home/ciro/.julia/packages/MLJModels/EhaRK/src/loading.jl:168


SVMClassifier(
    C = 1.0,
    kernel = "rbf",
    degree = 3,
    gamma = "auto",
    coef0 = 0.0,
    shrinking = true,
    tol = 0.001,
    cache_size = 200,
    max_iter = -1,
    decision_function_shape = "ovr",
    random_state = nothing)

In [36]:
#pipe = @pipeline reducer std svm
pipe = reducer |> std |> (X -> Matrix(X)) |> svm   # (X -> Matrix(X)) |>


DeterministicPipeline(
    pca = PCA(
            maxoutdim = 0,
            method = :auto,
            pratio = 0.9,
            mean = nothing),
    standardizer = Standardizer(
            features = Symbol[],
            ignore = false,
            ordered_factor = false,
            count = false),
    f = var"#42#43"(),
    svm_classifier = SVMClassifier(
            C = 1.0,
            kernel = "rbf",
            degree = 3,
            gamma = "auto",
            coef0 = 0.0,
            shrinking = true,
            tol = 0.001,
            cache_size = 200,
            max_iter = -1,
            decision_function_shape = "ovr",
            random_state = nothing),
    cache = true)

In [25]:
mach = MLJ.machine(pipe, X, y) |> fit!


┌ Info: Training Machine{DeterministicPipeline{NamedTuple{,…},…},…}.
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:464
┌ Info: Training Machine{PCA,…}.
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:464
┌ Info: Training Machine{Standardizer,…}.
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:464
┌ Error: Problem fitting Machine{SVMClassifier,…}
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:684
┌ Error: Problem fitting the machine Machine{DeterministicPipeline{NamedTuple{,…},…},…}. 
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:594
┌ Info: Running type checks... 
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:600
┌ Info: Type checks okay. 
└ @ MLJBase /home/ciro/.julia/packages/MLJBase/pCiRR/src/machines.jl:603


LoadError: TaskFailedException

[91m    nested task error: [39mtype Nothing has no field model
    Stacktrace:
     [1] [0m[1mfit_only![22m[0m[1m([22m[90mmach[39m::[0mMachine[90m{MLJScikitLearnInterface.SVMClassifier, true}[39m, [90mwait_on_downstream[39m::[0mBool; [90mkwargs[39m::[0mBase.Iterators.Pairs[90m{Symbol, Int64, Tuple{Symbol}, NamedTuple{(:verbosity,), Tuple{Int64}}}[39m[0m[1m)[22m
    [90m   @ [39m[35mMLJBase[39m [90m~/.julia/packages/MLJBase/pCiRR/src/[39m[90;4mmachines.jl:685[0m
     [2] [0m[1m(::MLJBase.var"#67#69"{Base.Iterators.Pairs{Symbol, Int64, Tuple{Symbol}, NamedTuple{(:verbosity,), Tuple{Int64}}}, Machine{MLJScikitLearnInterface.SVMClassifier, true}})[22m[0m[1m([22m[0m[1m)[22m
    [90m   @ [39m[35mMLJBase[39m [90m./[39m[90;4mtask.jl:411[0m
    
    [91mcaused by: [39mtype Nothing has no field model
    Stacktrace:
      [1] [0m[1mgetproperty[22m[0m[1m([22m[90mx[39m::[0mNothing, [90mf[39m::[0mSymbol[0m[1m)[22m
    [90m    @ [39m[90mBase[39m [90m./[39m[90;4mBase.jl:33[0m
      [2] [0m[1mdiagnostics[22m[0m[1m([22m::[0mNode[90m{Nothing}[39m; [90mkwargs[39m::[0mBase.Iterators.Pairs[90m{Symbol, Colon, Tuple{Symbol}, NamedTuple{(:rows,), Tuple{Colon}}}[39m[0m[1m)[22m
    [90m    @ [39m[35mMLJBase[39m [90m~/.julia/packages/MLJBase/pCiRR/src/[39m[90;4msources.jl:79[0m
      [3] [0m[1mmacro expansion[22m
    [90m    @ [39m[90m./[39m[90;4mlogging.jl:340[0m[90m [inlined][39m
      [4] [0m[1m_apply[22m[0m[1m([22m::[0mTuple[90m{Node{Nothing}}[39m; [90mkwargs[39m::[0mBase.Iterators.Pairs[90m{Symbol, Colon, Tuple{Symbol}, NamedTuple{(:rows,), Tuple{Colon}}}[39m[0m[1m)[22m
    [90m    @ [39m[35mMLJBase[39m [90m~/.julia/packages/MLJBase/pCiRR/src/composition/learning_networks/[39m[90;4mnodes.jl:126[0m
      [5] [0m[1m#_#59[22m
    [90m    @ [39m[90m~/.julia/packages/MLJBase/pCiRR/src/composition/learning_networks/[39m[90;4mnodes.jl:114[0m[90m [inlined][39m
      [6] [0m[1mNode[22m
    [90m    @ [39m[90m~/.julia/packages/MLJBase/pCiRR/src/composition/learning_networks/[39m[90;4mnodes.jl:114[0m[90m [inlined][39m
      [7] [0m[1m#52[22m
    [90m    @ [39m[90m~/.julia/packages/MLJBase/pCiRR/src/[39m[90;4mmachines.jl:573[0m[90m [inlined][39m
      [8] [0m[1mmap[22m[0m[1m([22m[90mf[39m::[0mMLJBase.var"#52#54", [90mt[39m::[0mTuple[90m{Node{Nothing}, MLJBase.Source}[39m[0m[1m)[22m
    [90m    @ [39m[90mBase[39m [90m./[39m[90;4mtuple.jl:214[0m
      [9] [0m[1mfit_only![22m[0m[1m([22m[90mmach[39m::[0mMachine[90m{MLJScikitLearnInterface.SVMClassifier, true}[39m; [90mrows[39m::[0mNothing, [90mverbosity[39m::[0mInt64, [90mforce[39m::[0mBool[0m[1m)[22m
    [90m    @ [39m[35mMLJBase[39m [90m~/.julia/packages/MLJBase/pCiRR/src/[39m[90;4mmachines.jl:573[0m
     [10] [0m[1mfit_only![22m[0m[1m([22m[90mmach[39m::[0mMachine[90m{MLJScikitLearnInterface.SVMClassifier, true}[39m, [90mwait_on_downstream[39m::[0mBool; [90mkwargs[39m::[0mBase.Iterators.Pairs[90m{Symbol, Int64, Tuple{Symbol}, NamedTuple{(:verbosity,), Tuple{Int64}}}[39m[0m[1m)[22m
    [90m    @ [39m[35mMLJBase[39m [90m~/.julia/packages/MLJBase/pCiRR/src/[39m[90;4mmachines.jl:681[0m
     [11] [0m[1m(::MLJBase.var"#67#69"{Base.Iterators.Pairs{Symbol, Int64, Tuple{Symbol}, NamedTuple{(:verbosity,), Tuple{Int64}}}, Machine{MLJScikitLearnInterface.SVMClassifier, true}})[22m[0m[1m([22m[0m[1m)[22m
    [90m    @ [39m[35mMLJBase[39m [90m./[39m[90;4mtask.jl:411[0m
    
    [91mcaused by: [39mMethodError: objects of type Matrix{Float32} are not callable
    Use square brackets [] for indexing an Array.
    Stacktrace:
     [1] [0m[1m_apply[22m[0m[1m([22m::[0mTuple[90m{Node{Nothing}}[39m; [90mkwargs[39m::[0mBase.Iterators.Pairs[90m{Symbol, Colon, Tuple{Symbol}, NamedTuple{(:rows,), Tuple{Colon}}}[39m[0m[1m)[22m
    [90m   @ [39m[35mMLJBase[39m [90m~/.julia/packages/MLJBase/pCiRR/src/composition/learning_networks/[39m[90;4mnodes.jl:124[0m
     [2] [0m[1m#_#59[22m
    [90m   @ [39m[90m~/.julia/packages/MLJBase/pCiRR/src/composition/learning_networks/[39m[90;4mnodes.jl:114[0m[90m [inlined][39m
     [3] [0m[1mNode[22m
    [90m   @ [39m[90m~/.julia/packages/MLJBase/pCiRR/src/composition/learning_networks/[39m[90;4mnodes.jl:114[0m[90m [inlined][39m
     [4] [0m[1m#52[22m
    [90m   @ [39m[90m~/.julia/packages/MLJBase/pCiRR/src/[39m[90;4mmachines.jl:573[0m[90m [inlined][39m
     [5] [0m[1mmap[22m[0m[1m([22m[90mf[39m::[0mMLJBase.var"#52#54", [90mt[39m::[0mTuple[90m{Node{Nothing}, MLJBase.Source}[39m[0m[1m)[22m
    [90m   @ [39m[90mBase[39m [90m./[39m[90;4mtuple.jl:214[0m
     [6] [0m[1mfit_only![22m[0m[1m([22m[90mmach[39m::[0mMachine[90m{MLJScikitLearnInterface.SVMClassifier, true}[39m; [90mrows[39m::[0mNothing, [90mverbosity[39m::[0mInt64, [90mforce[39m::[0mBool[0m[1m)[22m
    [90m   @ [39m[35mMLJBase[39m [90m~/.julia/packages/MLJBase/pCiRR/src/[39m[90;4mmachines.jl:573[0m
     [7] [0m[1mfit_only![22m[0m[1m([22m[90mmach[39m::[0mMachine[90m{MLJScikitLearnInterface.SVMClassifier, true}[39m, [90mwait_on_downstream[39m::[0mBool; [90mkwargs[39m::[0mBase.Iterators.Pairs[90m{Symbol, Int64, Tuple{Symbol}, NamedTuple{(:verbosity,), Tuple{Int64}}}[39m[0m[1m)[22m
    [90m   @ [39m[35mMLJBase[39m [90m~/.julia/packages/MLJBase/pCiRR/src/[39m[90;4mmachines.jl:681[0m
     [8] [0m[1m(::MLJBase.var"#67#69"{Base.Iterators.Pairs{Symbol, Int64, Tuple{Symbol}, NamedTuple{(:verbosity,), Tuple{Int64}}}, Machine{MLJScikitLearnInterface.SVMClassifier, true}})[22m[0m[1m([22m[0m[1m)[22m
    [90m   @ [39m[35mMLJBase[39m [90m./[39m[90;4mtask.jl:411[0m

In [23]:
ŷ = MLJ.fit(mach, X)
first(ŷ, 5)


LoadError: MethodError: no method matching fit(::Machine{MLJBase.DeterministicPipeline{NamedTuple{(:pca, :standardizer, :svm_classifier), Tuple{Unsupervised, Unsupervised, Deterministic}}, MLJModelInterface.predict}, true}, ::DataFrame)
[0mClosest candidates are:
[0m  fit([91m::Union{MLJBase.TransformedTargetModelDeterministic, MLJBase.TransformedTargetModelDeterministicSupervisedDetector, MLJBase.TransformedTargetModelDeterministicUnsupervisedDetector, MLJBase.TransformedTargetModelInterval, MLJBase.TransformedTargetModelProbabilistic, MLJBase.TransformedTargetModelProbabilisticSupervisedDetector, MLJBase.TransformedTargetModelProbabilisticUnsupervisedDetector}[39m, ::Any, [91m::Any[39m, [91m::Any[39m, [91m::Any...[39m) at /home/ciro/.julia/packages/MLJBase/pCiRR/src/composition/models/transformed_target_model.jl:177
[0m  fit([91m::Union{MLJIteration.DeterministicIteratedModel{M}, MLJIteration.ProbabilisticIteratedModel{M}} where M[39m, ::Any, [91m::Any...[39m) at /home/ciro/.julia/packages/MLJIteration/Wc51L/src/core.jl:77
[0m  fit([91m::UnsupervisedAnnotator[39m, ::Any, [91m::Any[39m, [91m::Any[39m) at /home/ciro/.julia/packages/MLJModelInterface/GKiZ2/src/model_api.jl:21
[0m  ...