In [2]:
using Statistics
using MLJBase, CategoricalArrays
using MLJ

# We use flux only to get the MNIST
using Flux, Flux.Data.MNIST

#push!(LOAD_PATH, "../src/") ## Uncomment if MulticlassPerceptron not installed
using MulticlassPerceptron


┌ Info: Precompiling MulticlassPerceptron [dab37cba-9818-490d-9918-279965c31300]
└ @ Base loading.jl:1278
ERROR: LoadError: LoadError: UndefVarError: MLJModelInterface not defined
Stacktrace:
 [1] top-level scope at /Users/davidbuchaca/.julia/packages/MLJModelInterface/lb8aH/src/metadata_utils.jl:101
 [2] eval at ./boot.jl:331 [inlined]
 [3] eval(::Expr) at /Users/davidbuchaca/Documents/git_stuff/MulticlassPerceptron.jl/src/MulticlassPerceptron.jl:1
 [4] metadata_model(::Type{T} where T; input::Type{T} where T, target::Type{T} where T, output::Type{T} where T, weights::Bool, descr::String, path::String) at /Users/davidbuchaca/.julia/packages/MLJModelInterface/lb8aH/src/metadata_utils.jl:110
 [5] top-level scope at /Users/davidbuchaca/Documents/git_stuff/MulticlassPerceptron.jl/src/mlj/interface.jl:152
 [6] include(::Function, ::Module, ::String) at ./Base.jl:380
 [7] include at ./Base.jl:368 [inlined]
 [8] include(::String) at /Users/davidbuchaca/Documents/git_stuff/MulticlassPerceptro

LoadError: Failed to precompile MulticlassPerceptron [dab37cba-9818-490d-9918-279965c31300] to /Users/davidbuchaca/.julia/compiled/v1.5/MulticlassPerceptron/UgIHs_FaPdK.ji.

## Loading the data



In [None]:
function load_MNIST( ;array_eltype::DataType=Float32, verbose::Bool=true)

    if verbose
        time_init = time()
        println("\nMNIST Dataset Loading...")
    end
    train_imgs = MNIST.images(:train)                             # size(train_imgs) -> (60000,)
    test_imgs  = MNIST.images(:test)                              # size(test_imgs)  -> (10000,)
    train_x    = array_eltype.(hcat(reshape.(train_imgs, :)...))  # size(train_x)    -> (784, 60000)
    test_x     = array_eltype.(hcat(reshape.(test_imgs, :)...))   # size(test_x)     -> (784, 60000)

    ## Prepare data
    train_y = MNIST.labels(:train) .+ 1;
    test_y  = MNIST.labels(:test)  .+ 1;

    ## Encode targets as CategoricalArray objects
    train_y = CategoricalArray(train_y)
    test_y  = CategoricalArray(test_y)

    if verbose
        time_taken = round(time()-time_init; digits=3)
        println("\nMNIST Dataset Loaded, it took $time_taken seconds")
    end
    return train_x, train_y, test_x, test_y
end

println("\nLoading data\n")
train_x, train_y, test_x, test_y = load_MNIST( ;array_eltype=Float32, verbose=true)



Loading data


MNIST Dataset Loading...


┌ Info: Downloading MNIST dataset
└ @ Flux.Data.MNIST /Users/davidbuchaca/.julia/packages/Flux/05b38/src/data/mnist.jl:24
┌ Info: Downloading MNIST dataset
└ @ Flux.Data.MNIST /Users/davidbuchaca/.julia/packages/Flux/05b38/src/data/mnist.jl:24


In [3]:
## Define model and train it
n_features = size(train_x, 1);
n_classes  = length(unique(train_y));
y = MNIST.labels(:train) .+ 1;

## MulticlassPerceptronCore Object

The simplest way to train a MulticlassPerceptron is using the `MulticlassPerceptronCore` struct that simply stores the basic information of the model.

```julia
mutable struct MulticlassPerceptronCore{T}
    W::AbstractMatrix{T}
    b::AbstractVector{T}
    n_classes::Int
    n_features::Int
    is_sparse::Bool
end
```

In [4]:
is_sparse = false
perceptron = MulticlassPerceptronCore(Float32, n_classes, n_features, is_sparse) 

MulticlassPerceptronCore{Float32}(Float32[0.5650226 0.46391666 … 0.6035713 0.39280367; 0.8111923 0.70772016 … 0.46389484 0.84303176; … ; 0.2729447 0.10912049 … 0.14752984 0.23505485; 0.43693638 0.12277317 … 0.7287166 0.9326323], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 10, 784, false)

In [5]:
fit!(perceptron,
     train_x,
     y;
     verbosity=1,
     n_epochs=50,
     f_average_weights=true)

[KEpoch: 50 	 Accuracy: 0.898

10-element Array{Float32,1}:
 -58.42659 
  34.944298
  20.117249
 -21.463497
   5.943909
 102.04138 
 -32.88856 
  55.006344
 -91.90213 
 -13.372534

In [6]:
y_hat_train = predict(perceptron, train_x)
y_hat_test  = predict(perceptron, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")

Results:
Train accuracy:0.93545
Test accuracy:0.9265




## MulticlassPerceptronClassifier Object

The `MulticlassPerceptronClassifier` is the basic object build to take advantage of MLJ capabilities.

We can use `fit` with a CategoricalArray of the target (which can have any values, it is not restricted to values from 1 to the number of classes). 

In [7]:
## Define model and train it
n_features = size(train_x, 1);
n_classes  = length(unique(train_y));
perceptron = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)

MulticlassPerceptronClassifier(n_epochs = 50,
                               n_epoch_patience = 5,
                               f_average_weights = true,
                               f_shuffle_data = false,
                               element_type = Float32,)[34m @ 1…15[39m

Training a `MulticlassPerceptronClassifier`

In [8]:
## Train the model
println("\nStart Learning\n")
time_init = time()
fitresult, _ , _  = fit(perceptron, 1, train_x, train_y) #
time_taken = round(time()-time_init; digits=3)


Start Learning

[KEpoch: 50 	 Accuracy: 0.898

10.476

Make predictions

In [9]:
println("\nLearning took $time_taken seconds\n")

## Make predictions
y_hat_train = predict(fitresult, train_x)
y_hat_test  = predict(fitresult, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")


Learning took 10.476 seconds

Results:
Train accuracy:0.9358166666666666
Test accuracy:0.9271




## Machine with `MulticlassPerceptronClassifier`



In [10]:
size(train_x)

(784, 60000)

In [11]:
train_x_rowexamples = MLJBase.table(train_x')  
train_x_rowexamples

Tables.MatrixTable{LinearAlgebra.Adjoint{Float32,Array{Float32,2}}}(Symbol[:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10  …  :x775, :x776, :x777, :x778, :x779, :x780, :x781, :x782, :x783, :x784], Dict(:x753 => 753,:x620 => 620,:x233 => 233,:x71 => 71,:x110 => 110,:x685 => 685,:x348 => 348,:x630 => 630,:x539 => 539,:x608 => 608…), Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0])

In [12]:
perceptron = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)

MulticlassPerceptronClassifier(n_epochs = 50,
                               n_epoch_patience = 5,
                               f_average_weights = true,
                               f_shuffle_data = false,
                               element_type = Float32,)[34m @ 9…09[39m

In [13]:
# machines expert Tables.Table or DataFrame objects, not AbstractArrays
perceptron_machine = machine(perceptron, train_x_rowexamples, train_y)  

[34mMachine{MulticlassPerceptronClassifier} @ 5…88[39m


In [14]:
## Train the model
println("\nStart Learning\n")
time_init = time()
#fitresult, _ , _  = MLJBase.fit(perceptron, 1, train_x, train_y) # If train_y is a CategoricalArray
fit!(perceptron_machine)
time_taken = round(time()-time_init; digits=3)
println("\nLearning took $time_taken seconds\n")


Start Learning



┌ Info: Training [34mMachine{MulticlassPerceptronClassifier} @ 5…88[39m.
└ @ MLJ /Users/macpro/.julia/packages/MLJ/O4DUw/src/machines.jl:141


[KEpoch: 50 	 Accuracy: 0.898
Learning took 18.894 seconds



In [15]:
## Make predictions
y_hat_train = predict(perceptron_machine, train_x)
y_hat_test  = predict(perceptron_machine, test_x);

## Evaluate the model
println("Results:")
println("Train accuracy:", mean(y_hat_train .== train_y))
println("Test accuracy:",  mean(y_hat_test  .== test_y))
println("\n")

Results:
Train accuracy:0.9358
Test accuracy:0.926




## EnsembleModel

MLJ offers basic support for ensembling techniques such as bagging. 

For the MulticlassPerceptorn as is, it does not make a lot of sense to do bagging, but for other estimators it is a pretty interesting technique.

In order to define an Ensemble we need an "atomic" model.

In [16]:
using MLJ

In [17]:
using Tables

In [18]:
train_x_table = Tables.table(copy(train_x'))

Tables.MatrixTable{Array{Float32,2}}(Symbol[:Column1, :Column2, :Column3, :Column4, :Column5, :Column6, :Column7, :Column8, :Column9, :Column10  …  :Column775, :Column776, :Column777, :Column778, :Column779, :Column780, :Column781, :Column782, :Column783, :Column784], Dict(:Column42 => 42,:Column285 => 285,:Column714 => 714,:Column630 => 630,:Column167 => 167,:Column607 => 607,:Column693 => 693,:Column499 => 499,:Column52 => 52,:Column647 => 647…), Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0])

In [19]:
target_scitype(MulticlassPerceptronClassifier)

AbstractArray{#s26,1} where #s26<:Finite

In [20]:
model        = MulticlassPerceptronClassifier()

MulticlassPerceptronClassifier(n_epochs = 100,
                               n_epoch_patience = 5,
                               f_average_weights = true,
                               f_shuffle_data = false,
                               element_type = Float32,)[34m @ 3…63[39m

In [21]:
ensemble_model   = EnsembleModel(atom=model, n=10)

MLJ.DeterministicEnsembleModel(atom = MulticlassPerceptronClassifier(n_epochs = 100,
                                                                     n_epoch_patience = 5,
                                                                     f_average_weights = true,
                                                                     f_shuffle_data = false,
                                                                     element_type = Float32,),
                               weights = Float64[],
                               bagging_fraction = 0.8,
                               rng = Random._GLOBAL_RNG(),
                               n = 10,
                               acceleration = ComputationalResources.CPU1{Nothing}(nothing),
                               out_of_bag_measure = Any[],)[34m @ 5…16[39m

In [22]:
@time ensemble_machine = machine(ensemble_model, train_x_table, train_y)

  0.340027 seconds (263.20 k allocations: 192.407 MiB, 11.63% gc time)


[34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 3…81[39m


In [23]:
@time fit!(ensemble_machine)

┌ Info: Training [34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 3…81[39m.
└ @ MLJ /Users/macpro/.julia/packages/MLJ/O4DUw/src/machines.jl:141



501.359605 seconds (325.28 M allocations: 73.528 GiB, 2.40% gc time)


[34mMachine{DeterministicEnsembleModel{MulticlassPerceptronClassifier}} @ 3…81[39m


In [24]:
y_hat = predict(ensemble_machine,train_x_table);
print("accuracy:  $(mean(y_hat .== y))")

accuracy:  0.9399666666666666

In [25]:
test_x_table = Tables.table(copy(test_x'))
y_hat_test   = predict(ensemble_machine, test_x_table);
print("accuracy:  $(mean(y_hat_test .== test_y))")

accuracy:  0.927

Notice that an ensemble with bagging is simply a bunch of models which are used to make the final prediction (by simply averaging the predictions of the different models)

In [26]:
ensemble_machine.fitresult.ensemble[1:2]

2-element Array{Tuple{MulticlassPerceptronCore{Float32},MLJBase.CategoricalDecoder{Int64,UInt32}},1}:
 (MulticlassPerceptronCore{Float32}(Float32[0.4784634 0.3611853 … 0.86662626 0.67473185; 0.4584763 0.9323393 … 0.62220395 0.93152404; … ; 0.1678698 0.83101666 … 0.7610233 0.546931; 0.106662035 0.98610127 … 0.97552323 0.6315185], Float32[-60.929695, 32.70521, 8.1863, -21.584425, 5.50861, 96.8037, -22.855516, 66.690125, -90.95068, -13.573548], 10, 784, false), MLJBase.CategoricalDecoder{Int64,UInt32}(CategoricalPool{Int64,UInt32}([1,2,3,4,5,6,7,8,9,10]), [2, 4, 6, 7, 3, 1, 8, 9, 10, 5]))  
 (MulticlassPerceptronCore{Float32}(Float32[0.3944986 0.69161904 … 0.9976491 0.6305163; 0.15075302 0.7414079 … 0.0011826754 0.7201482; … ; 0.870644 0.7313173 … 0.5787585 0.49066257; 0.8292482 0.3163396 … 0.10351348 0.8882675], Float32[-61.932487, 21.746744, 29.859648, -26.7205, 15.197302, 96.541534, -33.92519, 54.036354, -85.9457, -8.8579855], 10, 784, false), MLJBase.CategoricalDecoder{Int64,UInt32}(C

We can see that bagging does not improve a lot the results for the MulticlassPerceptron

In [27]:
p = MulticlassPerceptronClassifier(n_epochs=50; f_average_weights=true)
fitresult, _ , _  = fit(p, 1, train_x, train_y);
y_test_preds = predict(fitresult, test_x);
mean(y_test_preds .== test_y)

[KEpoch: 50 	 Accuracy: 0.899

0.9264

### Composing Models