In [1]:
using Statistics
using MLJBase, CategoricalArrays
using MLJ

# We use flux only to get the MNIST
using Flux, Flux.Data.MNIST

#push!(LOAD_PATH, "../src/") ## Uncomment if MulticlassPerceptron not installed
using MulticlassPerceptron


┌ Info: CUDAdrv.jl failed to initialize, GPU functionality unavailable (set JULIA_CUDA_SILENT or JULIA_CUDA_VERBOSE to silence or expand this message)
└ @ CUDAdrv /Users/david/.julia/packages/CUDAdrv/3EzC1/src/CUDAdrv.jl:69


## Loading the data



In [2]:
function load_MNIST( ;array_eltype::DataType=Float32, verbose::Bool=true)

    if verbose
        time_init = time()
        println("\nMNIST Dataset Loading...")
    end
    train_imgs = MNIST.images(:train)                             # size(train_imgs) -> (60000,)
    test_imgs  = MNIST.images(:test)                              # size(test_imgs)  -> (10000,)
    train_x    = array_eltype.(hcat(reshape.(train_imgs, :)...))  # size(train_x)    -> (784, 60000)
    test_x     = array_eltype.(hcat(reshape.(test_imgs, :)...))   # size(test_x)     -> (784, 60000)

    ## Prepare data
    train_y = MNIST.labels(:train) .+ 1;
    test_y  = MNIST.labels(:test)  .+ 1;

    ## Encode targets as CategoricalArray objects
    train_y = CategoricalArray(train_y)
    test_y  = CategoricalArray(test_y)

    if verbose
        time_taken = round(time()-time_init; digits=3)
        println("\nMNIST Dataset Loaded, it took $time_taken seconds")
    end
    return train_x, train_y, test_x, test_y
end

println("\nLoading data\n")
train_x, train_y, test_x, test_y = load_MNIST( ;array_eltype=Float32, verbose=true)



Loading data


MNIST Dataset Loading...

MNIST Dataset Loaded, it took 1.05 seconds


(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], CategoricalValue{Int64,UInt32}[6, 1, 5, 2, 10, 3, 2, 4, 2, 5  …  10, 3, 10, 6, 2, 9, 4, 6, 7, 9], Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], CategoricalValue{Int64,UInt32}[8, 3, 2, 1, 5, 2, 5, 10, 6, 10  …  8, 9, 10, 1, 2, 3, 4, 5, 6, 7])

In [3]:
## Define model and train it
n_features = size(train_x, 1);
n_classes  = length(unique(train_y));
y = MNIST.labels(:train) .+ 1;

## MulticlassPerceptronCore Object

The simplest way to train a MulticlassPerceptron is using the `MulticlassPerceptronCore` struct that simply stores the basic information of the model.

```julia
mutable struct MulticlassPerceptronCore{T}
    W::AbstractMatrix{T}
    b::AbstractVector{T}
    n_classes::Int
    n_features::Int
    is_sparse::Bool
end
```

In [4]:
is_sparse = false
perceptron = MulticlassPerceptronCore(Float32, n_classes, n_features, is_sparse) 

MulticlassPerceptronCore{Float32}(Float32[0.41853082 0.76888824 … 0.49009085 0.2114991; 0.25780487 0.95415425 … 0.45526063 0.25016534; … ; 0.67237735 0.21411085 … 0.18651879 0.029226422; 0.713514 0.9978076 … 0.2012527 0.6974728], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 10, 784, false)

In [5]:
fit!(perceptron,
     train_x,
     y;
     verbosity=1,
     n_epochs=50,
     f_average_weights=true)



UndefVarError: UndefVarError: fit! not defined

In [6]:
y_hat_train = MulticlassPerceptron.predict(perceptron, train_x)
y_hat_test  = MulticlassPerceptron.predict(perceptron, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")

Results:
Train accuracy:0.15008333333333335
Test accuracy:0.1394




## MulticlassPerceptronClassifier Object

The `MulticlassPerceptronClassifier` is the basic object build to take advantage of MLJ capabilities.

We can use `fit` with a CategoricalArray of the target (which can have any values, it is not restricted to values from 1 to the number of classes). 

In [7]:
## Define model and train it
n_features = size(train_x, 1);
n_classes  = length(unique(train_y));
perceptron = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)

MulticlassPerceptronClassifier(n_epochs = 50,
                               n_epoch_patience = 5,
                               f_average_weights = true,
                               f_shuffle_data = false,
                               element_type = Float32,)[34m @ 3…73[39m

Training a `MulticlassPerceptronClassifier`

In [8]:
## Train the model
println("\nStart Learning\n")
time_init = time()
fitresult, _ , _  = fit(perceptron, 1, train_x, train_y) #
time_taken = round(time()-time_init; digits=3)


Start Learning

[KEpoch: 50 	 Accuracy: 0.898

15.098

Make predictions

In [9]:
println("\nLearning took $time_taken seconds\n")

## Make predictions
y_hat_train = MLJBase.predict(fitresult, train_x)
y_hat_test  = MLJBase.predict(fitresult, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")


Learning took 15.098 seconds

Results:
Train accuracy:0.9357666666666666
Test accuracy:0.9263




## Machine with `MulticlassPerceptronClassifier`



In [10]:
size(train_x)

(784, 60000)

In [11]:
train_x_rowexamples = MLJBase.table(train_x')  
train_x_rowexamples

Tables.MatrixTable{LinearAlgebra.Adjoint{Float32,Array{Float32,2}}}(Symbol[:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10  …  :x775, :x776, :x777, :x778, :x779, :x780, :x781, :x782, :x783, :x784], Dict(:x753 => 753,:x620 => 620,:x233 => 233,:x71 => 71,:x110 => 110,:x685 => 685,:x348 => 348,:x630 => 630,:x539 => 539,:x608 => 608…), Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0])

In [12]:
perceptron = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)

MulticlassPerceptronClassifier(n_epochs = 50,
                               n_epoch_patience = 5,
                               f_average_weights = true,
                               f_shuffle_data = false,
                               element_type = Float32,)[34m @ 1…46[39m

In [13]:
# machines expert Tables.Table or DataFrame objects, not AbstractArrays
perceptron_machine = machine(perceptron, train_x_rowexamples, train_y)  

[34mMachine{MulticlassPerceptronClassifier} @ 3…26[39m


In [14]:
## Train the model
println("\nStart Learning\n")
time_init = time()
#fitresult, _ , _  = MLJBase.fit(perceptron, 1, train_x, train_y) # If train_y is a CategoricalArray
MLJBase.fit!(perceptron_machine)
time_taken = round(time()-time_init; digits=3)
println("\nLearning took $time_taken seconds\n")


Start Learning



┌ Info: Training [34mMachine{MulticlassPerceptronClassifier} @ 3…26[39m.
└ @ MLJ /Users/david/.julia/packages/MLJ/mxD3X/src/machines.jl:172


[KEpoch: 50 	 Accuracy: 0.898
Learning took 29.612 seconds



In [15]:
## Make predictions
y_hat_train = MLJBase.predict(perceptron_machine, train_x)
y_hat_test  = MLJBase.predict(perceptron_machine, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")

Results:
Train accuracy:0.9357833333333333
Test accuracy:0.927




## EnsembleModel

MLJ offers basic support for ensembling techniques such as bagging. 

For the MulticlassPerceptorn as is, it does not make a lot of sense to do bagging, but for other estimators it is a pretty interesting technique.

In order to define an Ensemble we need an "atomic" model.

In [16]:
using MLJ

In [17]:
using Tables

In [18]:
train_x_table = Tables.table(copy(train_x'))

Tables.MatrixTable{Array{Float32,2}}(Symbol[:Column1, :Column2, :Column3, :Column4, :Column5, :Column6, :Column7, :Column8, :Column9, :Column10  …  :Column775, :Column776, :Column777, :Column778, :Column779, :Column780, :Column781, :Column782, :Column783, :Column784], Dict(:Column42 => 42,:Column285 => 285,:Column714 => 714,:Column630 => 630,:Column167 => 167,:Column607 => 607,:Column693 => 693,:Column499 => 499,:Column52 => 52,:Column647 => 647…), Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0])

In [20]:
target_scitype(MulticlassPerceptronClassifier)

AbstractArray{#s26,1} where #s26<:Finite

In [21]:
model        = MulticlassPerceptronClassifier()

MulticlassPerceptronClassifier(n_epochs = 100,
                               n_epoch_patience = 5,
                               f_average_weights = true,
                               f_shuffle_data = false,
                               element_type = Float32,)[34m @ 4…43[39m

In [22]:
ensemble_model   = EnsembleModel(atom=model, n=20)

MLJ.DeterministicEnsembleModel(atom = MulticlassPerceptronClassifier(n_epochs = 100,
                                                                     n_epoch_patience = 5,
                                                                     f_average_weights = true,
                                                                     f_shuffle_data = false,
                                                                     element_type = Float32,),
                               atomic_weights = Float64[],
                               bagging_fraction = 0.8,
                               rng = Random._GLOBAL_RNG(),
                               n = 20,
                               acceleration = ComputationalResources.CPU1{Nothing}(nothing),
                               out_of_bag_measure = Any[],)[34m @ 1…88[39m

In [23]:
@time ensemble_machine = machine(ensemble_model, train_x_table, train_y)

340.908737 seconds (5.08 M allocations: 929.223 MiB, 0.04% gc time)


[34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 1…08[39m


In [24]:
train_x_table

Tables.MatrixTable{Array{Float32,2}}(Symbol[:Column1, :Column2, :Column3, :Column4, :Column5, :Column6, :Column7, :Column8, :Column9, :Column10  …  :Column775, :Column776, :Column777, :Column778, :Column779, :Column780, :Column781, :Column782, :Column783, :Column784], Dict(:Column42 => 42,:Column285 => 285,:Column714 => 714,:Column630 => 630,:Column167 => 167,:Column607 => 607,:Column693 => 693,:Column499 => 499,:Column52 => 52,:Column647 => 647…), Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0])

In [25]:
MLJ.fit!(ensemble_machine)

┌ Info: Training [34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 1…08[39m.
└ @ MLJ /Users/david/.julia/packages/MLJ/mxD3X/src/machines.jl:172





[34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 1…08[39m


In [26]:
y_hat = MLJ.predict(ensemble_machine,train_x_table);
print("accuracy:  $(mean(y_hat .== y))")

accuracy:  0.9410666666666667

In [27]:
test_x_table = Tables.table(copy(test_x'))
y_hat_test   = MLJ.predict(ensemble_machine,test_x_table);
print("accuracy:  $(mean(y_hat_test .== test_y))")

accuracy:  0.9282

Notice that an ensemble with bagging is simply a bunch of models which are used to make the final prediction (by simply averaging the predictions of the different models)

In [28]:
ensemble_machine.fitresult.ensemble

20-element Array{Tuple{MulticlassPerceptronCore{Float32},MLJBase.CategoricalDecoder{Int64,UInt32}},1}:
 (MulticlassPerceptronCore{Float32}(Float32[0.55092514 0.6111343 … 0.20649719 0.4677608; 0.81517005 0.14493835 … 0.39646065 0.5024731; … ; 0.5304966 0.53643095 … 0.41094077 0.31922877; 0.396919 0.7265923 … 0.28136563 0.6685412], Float32[-54.50738, 38.859306, 9.523631, -27.911634, 15.523544, 93.374954, -33.902954, 57.125854, -88.17588, -9.909387], 10, 784, false), MLJBase.CategoricalDecoder{Int64,UInt32}(CategoricalPool{Int64,UInt32}([1,2,3,4,5,6,7,8,9,10]), [2, 4, 6, 7, 3, 1, 8, 9, 10, 5]))       
 (MulticlassPerceptronCore{Float32}(Float32[0.57465625 0.8450837 … 0.43707108 0.016412616; 0.9990845 0.9994185 … 0.9308469 0.45611954; … ; 0.44581306 0.23577201 … 0.09314263 0.5423784; 0.6474056 0.4340917 … 0.22253466 0.4615097], Float32[-74.36113, 44.65009, 20.575394, -16.168709, 17.02769, 87.705795, -28.41301, 54.46647, -92.763016, -12.71936], 10, 784, false), MLJBase.CategoricalDecoder{In

We can see that bagging does not improve a lot the results for the MulticlassPerceptron

In [29]:
p = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)
fitresult, _ , _  = fit(p, 1, train_x, train_y);
y_test_preds = MLJBase.predict(fitresult, test_x);
mean(y_test_preds .== test_y)

[KEpoch: 50 	 Accuracy: 0.898

0.9266

### Composing Models