In [1]:
using Statistics

using CategoricalArrays
using MLJ
#using MLJBase

using MLJModelInterface

# We use flux only to get the MNIST
using Flux, Flux.Data.MNIST


In [2]:
#push!(LOAD_PATH, "../src/") ## Uncomment if MulticlassPerceptron not installed
using MulticlassPerceptron

## Loading the data



In [3]:
function load_MNIST( ;array_eltype::DataType=Float32, verbose::Bool=true)

    if verbose
        time_init = time()
        println("\nMNIST Dataset Loading...")
    end
    train_imgs = MNIST.images(:train)                             # size(train_imgs) -> (60000,)
    test_imgs  = MNIST.images(:test)                              # size(test_imgs)  -> (10000,)
    train_x    = array_eltype.(hcat(reshape.(train_imgs, :)...))  # size(train_x)    -> (784, 60000)
    test_x     = array_eltype.(hcat(reshape.(test_imgs, :)...))   # size(test_x)     -> (784, 60000)

    ## Prepare data
    train_y = MNIST.labels(:train) .+ 1;
    test_y  = MNIST.labels(:test)  .+ 1;

    ## Encode targets as CategoricalArray objects
    train_y = CategoricalArray(train_y)
    test_y  = CategoricalArray(test_y)

    if verbose
        time_taken = round(time()-time_init; digits=3)
        println("\nMNIST Dataset Loaded, it took $time_taken seconds")
    end
    return train_x, train_y, test_x, test_y
end

println("\nLoading data\n")
train_x, train_y, test_x, test_y = load_MNIST( ;array_eltype=Float32, verbose=true)



Loading data


MNIST Dataset Loading...

MNIST Dataset Loaded, it took 0.551 seconds


(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], CategoricalValue{Int64,UInt32}[6, 1, 5, 2, 10, 3, 2, 4, 2, 5  …  10, 3, 10, 6, 2, 9, 4, 6, 7, 9], Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], CategoricalValue{Int64,UInt32}[8, 3, 2, 1, 5, 2, 5, 10, 6, 10  …  8, 9, 10, 1, 2, 3, 4, 5, 6, 7])

In [4]:
## Define model and train it
n_features = size(train_x, 1);
n_classes  = length(unique(train_y));
y = MNIST.labels(:train) .+ 1;

## MulticlassPerceptronCore Object

The simplest way to train a MulticlassPerceptron is using the `MulticlassPerceptronCore` struct that simply stores the basic information of the model.

```julia
mutable struct MulticlassPerceptronCore{T}
    W::AbstractMatrix{T}
    b::AbstractVector{T}
    n_classes::Int
    n_features::Int
    is_sparse::Bool
end
```

In [5]:
is_sparse = false
perceptron = MulticlassPerceptronCore(Float32, n_classes, n_features, is_sparse) 

MulticlassPerceptronCore{Float32}(Float32[0.2217648 0.6161314 … 0.6663526 0.8924842; 0.8276682 0.47969973 … 0.6944289 0.468629; … ; 0.08168745 0.7302754 … 0.72023857 0.85164917; 0.55826354 0.49479234 … 0.6552303 0.62207437], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 10, 784, false)

In [6]:
fit!(perceptron,
     train_x,
     y;
     verbosity=1,
     n_epochs=50,
     f_average_weights=true)

10-element Array{Float32,1}:
 -56.73323
  35.082172
  17.95843
 -21.79905
   5.1034603
 102.07189
 -32.864384
  54.587036
 -91.14038
 -12.265953

In [7]:
y_hat_train = predict(perceptron, train_x)
y_hat_test  = predict(perceptron, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")

Results:
Train accuracy:0.93585
Test accuracy:0.926




In [8]:
#MulticlassPerceptron.predict(perceptron,train_x)

## MulticlassPerceptronClassifier Object

The `MulticlassPerceptronClassifier` is the basic object build to take advantage of MLJ capabilities.

We can use `fit` with a CategoricalArray of the target (which can have any values, it is not restricted to values from 1 to the number of classes). 

In [9]:
## Define model and train it
n_features = size(train_x, 1);
n_classes  = length(unique(train_y));
perceptron = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)

MulticlassPerceptronClassifier(
    n_epochs = 50,
    n_epoch_patience = 5,
    f_average_weights = true,
    f_shuffle_data = false,
    element_type = Float32)[34m @ 7…09[39m

Training a `MulticlassPerceptronClassifier`

In [10]:
## Train the model
println("\nStart Learning\n")
time_init = time()
fitresult, _ , _  = fit(perceptron, 1, train_x, train_y) #
time_taken = round(time()-time_init; digits=3)


Start Learning



6.992

Make predictions

In [11]:
println("\nLearning took $time_taken seconds\n")

## Make predictions
y_hat_train = predict(fitresult, train_x)
y_hat_test  = predict(fitresult, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")


Learning took 6.992 seconds

Results:
Train accuracy:0.93565
Test accuracy:0.9264




## Machine with `MulticlassPerceptronClassifier`



In [12]:
using MLJ

In [13]:
size(train_x)

(784, 60000)

In [14]:
train_x_rowexamples = MLJ.table(train_x')  
train_x_rowexamples

Tables.MatrixTable{LinearAlgebra.Adjoint{Float32,Array{Float32,2}}}([:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10  …  :x775, :x776, :x777, :x778, :x779, :x780, :x781, :x782, :x783, :x784], Dict(:x753 => 753,:x620 => 620,:x233 => 233,:x71 => 71,:x110 => 110,:x685 => 685,:x348 => 348,:x630 => 630,:x539 => 539,:x608 => 608…), Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0])

In [15]:
perceptron = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)

MulticlassPerceptronClassifier(
    n_epochs = 50,
    n_epoch_patience = 5,
    f_average_weights = true,
    f_shuffle_data = false,
    element_type = Float32)[34m @ 5…97[39m

In [16]:
# machines expert Tables.Table or DataFrame objects, not AbstractArrays
perceptron_machine = machine(perceptron, train_x_rowexamples, train_y)  

[34mMachine{MulticlassPerceptronClassifier} @ 6…72[39m


In [17]:
## Train the model
println("\nStart Learning\n")
time_init = time()
#fitresult, _ , _  = MLJBase.fit(perceptron, 1, train_x, train_y) # If train_y is a CategoricalArray
fit!(perceptron_machine)
time_taken = round(time()-time_init; digits=3)
println("\nLearning took $time_taken seconds\n")


Start Learning



┌ Info: Training [34mMachine{MulticlassPerceptronClassifier} @ 6…72[39m.
└ @ MLJBase /Users/davidbuchaca1/.julia/packages/MLJBase/FFnHt/src/machines.jl:164



Learning took 8.891 seconds



In [18]:
## Make predictions
y_hat_train = predict(perceptron_machine, train_x)
y_hat_test  = predict(perceptron_machine, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")

Results:
Train accuracy:0.9356666666666666
Test accuracy:0.9267




## EnsembleModel

MLJ offers basic support for ensembling techniques such as bagging. 

For the MulticlassPerceptorn as is, it does not make a lot of sense to do bagging, but for other estimators it is a pretty interesting technique.

In order to define an Ensemble we need an "atomic" model.

In [19]:
using MLJ

In [20]:
using Tables

In [21]:
train_x_table = Tables.table(copy(train_x'))

Tables.MatrixTable{Array{Float32,2}}([:Column1, :Column2, :Column3, :Column4, :Column5, :Column6, :Column7, :Column8, :Column9, :Column10  …  :Column775, :Column776, :Column777, :Column778, :Column779, :Column780, :Column781, :Column782, :Column783, :Column784], Dict(:Column42 => 42,:Column285 => 285,:Column714 => 714,:Column630 => 630,:Column167 => 167,:Column607 => 607,:Column693 => 693,:Column499 => 499,:Column52 => 52,:Column647 => 647…), Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0])

In [22]:
target_scitype(MulticlassPerceptronClassifier)

AbstractArray{var"#s27",1} where var"#s27"<:Finite

In [23]:
model        = MulticlassPerceptronClassifier()

MulticlassPerceptronClassifier(
    n_epochs = 100,
    n_epoch_patience = 5,
    f_average_weights = true,
    f_shuffle_data = false,
    element_type = Float32)[34m @ 1…84[39m

In [24]:
ensemble_model   = EnsembleModel(atom=model, n=5)

DeterministicEnsembleModel(
    atom = MulticlassPerceptronClassifier(
            n_epochs = 100,
            n_epoch_patience = 5,
            f_average_weights = true,
            f_shuffle_data = false,
            element_type = Float32),
    atomic_weights = Float64[],
    bagging_fraction = 0.8,
    rng = Random._GLOBAL_RNG(),
    n = 5,
    acceleration = ComputationalResources.CPU1{Nothing}(nothing),
    out_of_bag_measure = Any[])[34m @ 1…69[39m

In [25]:
@time ensemble_machine = machine(ensemble_model, train_x_table, train_y)

  0.182134 seconds (330.24 k allocations: 196.131 MiB)


[34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 1…10[39m


In [26]:
@time fit!(ensemble_machine)

┌ Info: Training [34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 1…10[39m.
└ @ MLJBase /Users/davidbuchaca1/.julia/packages/MLJBase/FFnHt/src/machines.jl:164



 62.002520 seconds (156.46 M allocations: 38.226 GiB, 11.83% gc time)


[34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 1…10[39m


In [27]:
y_hat = predict(ensemble_machine,train_x_table);
print("accuracy:  $(mean(y_hat .== y))")

accuracy:  0.9395166666666667

In [28]:
test_x_table = Tables.table(copy(test_x'))
y_hat_test   = predict(ensemble_machine, test_x_table);
print("accuracy:  $(mean(y_hat_test .== test_y))")

accuracy:  0.9257

Notice that an ensemble with bagging is simply a bunch of models which are used to make the final prediction (by simply averaging the predictions of the different models)

In [29]:
ensemble_machine.fitresult.ensemble[1:2]

2-element Array{Tuple{MulticlassPerceptronCore{Float32},MLJBase.CategoricalDecoder{Int64,UInt32}},1}:
 (MulticlassPerceptronCore{Float32}(Float32[0.64297223 0.008398652 … 0.6292795 0.15694559; 0.052143455 0.6262623 … 0.9221982 0.85320485; … ; 0.06592238 0.9153894 … 0.26662958 0.32635856; 0.9947995 0.8695295 … 0.9973707 0.5885484], Float32[-79.71893, 45.91424, 21.676311, -22.243063, 13.311617, 91.76496, -31.114058, 58.082783, -87.00599, -10.667681], 10, 784, false), MLJBase.CategoricalDecoder{Int64,UInt32}(CategoricalPool{Int64,UInt32}([1,2,3,4,5,6,7,8,9,10]), [2, 4, 6, 7, 3, 1, 8, 9, 10, 5]))
 (MulticlassPerceptronCore{Float32}(Float32[0.123441815 0.4488144 … 0.7582301 0.9748217; 0.8331721 0.5930548 … 0.15318418 0.38802278; … ; 0.48523808 0.53570056 … 0.71892035 0.8819529; 0.86679924 0.1892097 … 0.6188264 0.02111578], Float32[-63.770912, 32.701706, 14.63555, -15.967604, 6.7843504, 102.18117, -30.627605, 52.21907, -77.3212, -20.834476], 10, 784, false), MLJBase.CategoricalDecoder{Int64,

We can see that bagging does not improve a lot the results for the MulticlassPerceptron

In [30]:
p = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)
fitresult, _ , _  = fit(p, 1, train_x, train_y);
y_test_preds = predict(fitresult, test_x);
mean(y_test_preds .== test_y)

0.9268