# Imports

In [1]:
using DataFrames
using CSV
using MLJ
using Flux
using Plots
using StatsBase
using MLJ: confusion_matrix

include("../../lib.jl")

ENV["LINES"]=30;

In [2]:
mkdir("./Figures")

LoadError: IOError: mkdir: file already exists (EEXIST)

# Datasets

In [3]:
data = CSV.read("data.csv");

In [4]:
describe(data)

Unnamed: 0_level_0,variable,mean,min,median,max,nunique,nmissing,eltype
Unnamed: 0_level_1,Symbol,Union…,Any,Union…,Any,Union…,Nothing,DataType
1,Class_Name,,B,,R,3.0,,String
2,Left_Weight,3.0,1,3.0,5,,,Int64
3,Left_Distance,3.0,1,3.0,5,,,Int64
4,Right_Weight,3.0,1,3.0,5,,,Int64
5,Right_Distance,3.0,1,3.0,5,,,Int64


Look at class labels to see if dataset is imbalanced

In [5]:
label_counts = countmap(data[:Class_Name])

Dict{String,Int64} with 3 entries:
  "B" => 49
  "L" => 288
  "R" => 288

In [6]:
collect(label_counts[i] / size(data)[1] for i in keys(label_counts))

3-element Array{Float64,1}:
 0.0784
 0.4608
 0.4608

Get data ready for training

In [7]:
coerce!(data, :Class_Name=>Multiclass,
              :Left_Weight=>Continuous,
              :Right_Weight=>Continuous,
              :Left_Distance=>Continuous,
              :Right_Distance=>Continuous)
schema(data)

┌[0m────────────────[0m┬[0m─────────────────────────────────[0m┬[0m───────────────[0m┐[0m
│[0m[22m _.names        [0m│[0m[22m _.types                         [0m│[0m[22m _.scitypes    [0m│[0m
├[0m────────────────[0m┼[0m─────────────────────────────────[0m┼[0m───────────────[0m┤[0m
│[0m Class_Name     [0m│[0m CategoricalValue{String,UInt32} [0m│[0m Multiclass{3} [0m│[0m
│[0m Left_Weight    [0m│[0m Float64                         [0m│[0m Continuous    [0m│[0m
│[0m Left_Distance  [0m│[0m Float64                         [0m│[0m Continuous    [0m│[0m
│[0m Right_Weight   [0m│[0m Float64                         [0m│[0m Continuous    [0m│[0m
│[0m Right_Distance [0m│[0m Float64                         [0m│[0m Continuous    [0m│[0m
└[0m────────────────[0m┴[0m─────────────────────────────────[0m┴[0m───────────────[0m┘[0m
_.nrows = 625


In [8]:
y, X = unpack(data, ==(:Class_Name), colname->true)

(CategoricalValue{String,UInt32}["B", "R", "R", "R", "R", "R", "R", "R", "R", "R"  …  "L", "L", "L", "L", "L", "L", "L", "L", "L", "B"], 625×4 DataFrame
│ Row │ Left_Weight │ Left_Distance │ Right_Weight │ Right_Distance │
│     │ [90mFloat64[39m     │ [90mFloat64[39m       │ [90mFloat64[39m      │ [90mFloat64[39m        │
├─────┼─────────────┼───────────────┼──────────────┼────────────────┤
│ 1   │ 1.0         │ 1.0           │ 1.0          │ 1.0            │
│ 2   │ 1.0         │ 1.0           │ 1.0          │ 2.0            │
│ 3   │ 1.0         │ 1.0           │ 1.0          │ 3.0            │
│ 4   │ 1.0         │ 1.0           │ 1.0          │ 4.0            │
│ 5   │ 1.0         │ 1.0           │ 1.0          │ 5.0            │
│ 6   │ 1.0         │ 1.0           │ 2.0          │ 1.0            │
│ 7   │ 1.0         │ 1.0           │ 2.0          │ 2.0            │
│ 8   │ 1.0         │ 1.0           │ 2.0          │ 3.0            │
│ 9   │ 1.0         │ 1.0           │

Partition train and test data accoring to class labels

In [9]:
# data to use when trying to fit a single validation set
train, test = partition(eachindex(y), 0.7, shuffle=true, rng=123, stratify=values(data[:Class_Name])) # gives 70:30 split

([595, 102, 55, 568, 425, 389, 146, 63, 372, 250  …  195, 500, 571, 533, 112, 396, 297, 106, 303, 261], [444, 144, 546, 43, 19, 173, 365, 423, 27, 218  …  293, 614, 90, 471, 13, 134, 296, 79, 395, 415])

In [10]:
train_counts = countmap(data[train,:Class_Name])
collect(train_counts[i] / size(train)[1] for i in keys(train_counts))

3-element Array{Float64,1}:
 0.0776255707762557
 0.4611872146118721
 0.4611872146118721

In [11]:
test_counts = countmap(data[test,:Class_Name])
collect(test_counts[i] / size(test)[1] for i in keys(test_counts))

3-element Array{Float64,1}:
 0.08021390374331551
 0.45989304812834225
 0.45989304812834225

# Five Learning Algorithms

* Decision trees with some form of pruning
* Neural networks
* Boosting
* Support Vector Machines
* k-nearest neighbors


##### Testing
* Implement the algorithms
* Design two *interesting* classification problems. For the purposes of this assignment, a classification problem is just a set of training examples and a set of test examples.

In [12]:
models(matching(X,y))

42-element Array{NamedTuple{(:name, :package_name, :is_supervised, :docstring, :hyperparameter_ranges, :hyperparameter_types, :hyperparameters, :implemented_methods, :is_pure_julia, :is_wrapper, :load_path, :package_license, :package_url, :package_uuid, :prediction_type, :supports_online, :supports_weights, :input_scitype, :target_scitype, :output_scitype),T} where T<:Tuple,1}:
 (name = AdaBoostClassifier, package_name = ScikitLearn, ... )
 (name = AdaBoostStumpClassifier, package_name = DecisionTree, ... )
 (name = BaggingClassifier, package_name = ScikitLearn, ... )
 (name = BayesianLDA, package_name = MultivariateStats, ... )
 (name = BayesianLDA, package_name = ScikitLearn, ... )
 (name = BayesianQDA, package_name = ScikitLearn, ... )
 (name = BayesianSubspaceLDA, package_name = MultivariateStats, ... )
 (name = ConstantClassifier, package_name = MLJModels, ... )
 (name = DecisionTreeClassifier, package_name = DecisionTree, ... )
 (name = DeterministicConstantClassifier, package_na

In [13]:
@load NeuralNetworkClassifier verbosity=2

import MLJFlux ✔
import 

┌ Info: Loading into module "Main": 
└ @ MLJModels /home/andrew/.julia/packages/MLJModels/mUBFt/src/loading.jl:70


MLJFlux ✔


NeuralNetworkClassifier(
    builder = Short(
            n_hidden = 0,
            dropout = 0.5,
            σ = NNlib.σ),
    finaliser = NNlib.softmax,
    optimiser = ADAM(0.001, (0.9, 0.999), IdDict{Any,Any}()),
    loss = Flux.crossentropy,
    epochs = 10,
    batch_size = 1,
    lambda = 0.0,
    alpha = 0.0,
    optimiser_changes_trigger_retraining = false)[34m @981[39m

## Neural networks
* Use favorite kind of network and training algorithm. 
* You may use networks of nodes with as many layers as you like and any activation function you see fit.

1. https://github.com/alan-turing-institute/MLJFlux.jl/blob/master/examples/boston.ipynb
1. https://github.com/alan-turing-institute/MLJFlux.jl
1. https://alan-turing-institute.github.io/MLJ.jl/dev/transformers/

In [14]:
# Define a custom network
mutable struct CustomNN <:MLJFlux.Builder
    n1 ::Int
    n2 ::Int
end

In [15]:
function MLJFlux.build(nn::CustomNN, n_in, n_out)
    return Chain(
        Flux.Dense(n_in, nn.n1, σ),
        Flux.Dense(nn.n1, nn.n2, σ),
        Flux.Dense(nn.n2, n_out, σ),
    )
end

In [16]:
layer1 = 80
layer2 = 40

40

In [17]:
batch_sz = 16;

In [18]:
max_epochs = 4000;

### No-preprocessing

In [19]:
nn = NeuralNetworkClassifier(builder=CustomNN(layer1,layer2))

NeuralNetworkClassifier(
    builder = CustomNN(
            n1 = 80,
            n2 = 40),
    finaliser = NNlib.softmax,
    optimiser = ADAM(0.001, (0.9, 0.999), IdDict{Any,Any}()),
    loss = Flux.crossentropy,
    epochs = 10,
    batch_size = 1,
    lambda = 0.0,
    alpha = 0.0,
    optimiser_changes_trigger_retraining = false)[34m @935[39m

In [20]:
Net = machine(nn, X, y)

[34mMachine{NeuralNetworkClassifier{CustomNN,…}} @518[39m trained 0 times.
  args: 
    1:	[34mSource @885[39m ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	[34mSource @853[39m ⏎ `AbstractArray{Multiclass{3},1}`


In [21]:
nn.optimiser.eta = 0.001
nn.epochs = 60
nn.batch_size = batch_sz
nn.lambda = 0.01

0.01

In [22]:
fit!(Net, rows=train, verbosity=2)

┌ Info: Training [34mMachine{NeuralNetworkClassifier{CustomNN,…}} @518[39m.
└ @ MLJBase /home/andrew/.julia/packages/MLJBase/uKzAz/src/machines.jl:319
┌ Info: Loss is 0.9775
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.957
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.9498
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.9453
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.9407
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.9341
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.9207
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.8923
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.853
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss

[34mMachine{NeuralNetworkClassifier{CustomNN,…}} @518[39m trained 1 time.
  args: 
    1:	[34mSource @885[39m ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	[34mSource @853[39m ⏎ `AbstractArray{Multiclass{3},1}`


In [23]:
nn.optimiser.eta = nn.optimiser.eta / 3
nn.epochs = nn.epochs + 40

100

In [24]:
fit!(Net, rows=train, verbosity=2)

┌ Info: Updating [34mMachine{NeuralNetworkClassifier{CustomNN,…}} @518[39m.
└ @ MLJBase /home/andrew/.julia/packages/MLJBase/uKzAz/src/machines.jl:320
┌ Info: Loss is 0.6575
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.6573
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.6572
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.657
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.6569
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.6567
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.6566
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.6564
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Loss is 0.6563
└ @ MLJFlux /home/andrew/.julia/packages/MLJFlux/rYILg/src/core.jl:95
┌ Info: Los

[34mMachine{NeuralNetworkClassifier{CustomNN,…}} @518[39m trained 2 times.
  args: 
    1:	[34mSource @885[39m ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	[34mSource @853[39m ⏎ `AbstractArray{Multiclass{3},1}`


In [25]:
nn_acc = evaluate!(Net, resampling=CV(shuffle=true), measure=[cross_entropy, acc], verbosity=1)



┌[0m───────────────[0m┬[0m───────────────[0m┬[0m───────────────────────────────────────────────────[0m┐[0m
│[0m[22m _.measure     [0m│[0m[22m _.measurement [0m│[0m[22m _.per_fold                                        [0m│[0m
├[0m───────────────[0m┼[0m───────────────[0m┼[0m───────────────────────────────────────────────────[0m┤[0m
│[0m cross_entropy [0m│[0m 0.664         [0m│[0m Float32[0.682, 0.686, 0.627, 0.655, 0.659, 0.677] [0m│[0m
│[0m acc           [0m│[0m 0.867         [0m│[0m [0.857, 0.846, 0.904, 0.875, 0.875, 0.846]        [0m│[0m
└[0m───────────────[0m┴[0m───────────────[0m┴[0m───────────────────────────────────────────────────[0m┘[0m
_.per_observation = [[[0.556, 0.556, ..., 0.583], [1.03, 0.553, ..., 0.553], [0.553, 0.554, ..., 0.554], [0.558, 1.15, ..., 0.556], [0.587, 0.585, ..., 0.553], [0.576, 0.556, ..., 1.28]], missing]
_.fitted_params_per_fold = [ … ]
_.report_per_fold = [ … ]


In [26]:
fitted_params(Net)

(chain = Chain(Chain(Dense(4, 80, σ), Dense(80, 40, σ), Dense(40, 3, σ)), softmax),)

In [27]:
report(Net)

(training_losses = Any[1.030618f0, 0.9969693f0, 0.97918415f0, 0.9686325f0, 0.96174383f0, 0.95690876f0, 0.95330346f0, 0.95046276f0, 0.9481032f0, 0.9460374f0  …  0.6602216f0, 0.65992695f0, 0.65963894f0, 0.6593573f0, 0.65908164f0, 0.65881205f0, 0.658548f0, 0.65828955f0, 0.6580363f0, 0.6577881f0],)

### With Preprocessing

Standardizing Inputs makes a huge difference.
Before, I could only barely break below 0.4 training loss, but by standardizing inputs, I can easily get near 0.3.

In [28]:
standardizer = Standardizer()
stand = machine(standardizer, X[train,:]) #only want to standardize on training distribution
fit!(stand)
X_stand = MLJ.transform(stand, X);

┌ Info: Training [34mMachine{Standardizer} @559[39m.
└ @ MLJBase /home/andrew/.julia/packages/MLJBase/uKzAz/src/machines.jl:319


In [29]:
Net = machine(nn, X_stand, y)

[34mMachine{NeuralNetworkClassifier{CustomNN,…}} @394[39m trained 0 times.
  args: 
    1:	[34mSource @298[39m ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	[34mSource @656[39m ⏎ `AbstractArray{Multiclass{3},1}`


### Learning Curve

In [30]:
Net.model.epochs = max_epochs
fit!(Net, rows=train, verbosity=1, force=true)

┌ Info: Training [34mMachine{NeuralNetworkClassifier{CustomNN,…}} @394[39m.
└ @ MLJBase /home/andrew/.julia/packages/MLJBase/uKzAz/src/machines.jl:319


[34mMachine{NeuralNetworkClassifier{CustomNN,…}} @394[39m trained 1 time.
  args: 
    1:	[34mSource @298[39m ⏎ `Table{AbstractArray{Continuous,1}}`
    2:	[34mSource @656[39m ⏎ `AbstractArray{Multiclass{3},1}`


In [None]:
nn_acc = evaluate!(Net, resampling=CV(shuffle=true), measure=[cross_entropy, acc], verbosity=1)



In [None]:
vals = collect(0:5:max_epochs)
r = range(nn, :epochs, values=vals)
# r = range(nn, :epochs, lower=0, upper=max_epochs)
curve = learning_curve(Net, 
                        range=r, 
                        resampling=Holdout(fraction_train=0.7), 
#                         resampling=CV(nfolds=4), 
                        measure=cross_entropy, 
                        acceleration=CPUThreads())

In [None]:
plot(curve.parameter_values,
     curve.measurements,
     xlab=curve.parameter_name,
     ylab="Cross Entropy",
     label="Validation", lw=2)
plot!(Net.report.training_losses, label="Training", lw=2)

In [None]:
a = round(nn.optimiser.eta, digits=5)
b = round(minimum(curve.measurements), digits=5)
best_epochs = curve.parameter_values[argmin(curve.measurements)]
a,b, best_epochs

In [None]:
fn = "Figures/LearningCurve_NN_hidden:$(layer1)x$(layer2)_epochs:$(nn.epochs)_lr:$(a)_loss:$(b)_labmda:$(nn.lambda)"
png(replace(fn,'.' => ','))

### GridSearch for Hidden Layer Size 1

In [None]:
Net = machine(nn, X_stand, y)

In [None]:
param1 = :epochs
param2 = :(builder.n1)
# param2 = :(builder.n2)

r1 = range(nn, param1, lower=10, upper=4000, scale=:log10)
# r1 = range(nn, param1, lower=1, upper=100, scale=:linear)
r2 = range(nn, param2, lower=5, upper=100, scale=:linear)

In [None]:
self_tuning_nn_model = TunedModel(model=nn,
                                    tuning=Grid(goal=64),
                                    resampling=Holdout(fraction_train=0.7), 
                                    measure=cross_entropy,
                                    acceleration=CPUThreads(),
                                    range=[r1, r2])

In [None]:
self_tuning_nn = machine(self_tuning_nn_model, X_stand, y)

In [None]:
z = fit!(self_tuning_nn, rows=train)

In [None]:
plot(self_tuning_nn)

In [None]:
best = fitted_params(self_tuning_nn)
best

In [None]:
best.best_model

In [None]:
best_loss = z.report.best_result.measurement[1]

In [None]:
best_n1 = best.best_model.builder.n1

In [None]:
fn = "Figures/Grid_NN_$(param1):$(best.best_model.epochs)_x_$(param2):$(best_n1)_bestloss:$(best_loss)"
f = replace(fn,'.' => ',')
png(f)

### GridSearch for Hidden Layer Size 2

In [None]:
nn = NeuralNetworkClassifier(builder=CustomNN(best_n1,layer2))

In [None]:
nn.optimiser.eta = 0.001
nn.epochs = 60
nn.batch_size = batch_sz
nn.lambda = 0.01

In [None]:
Net = machine(nn, X_stand, y)

In [None]:
param1 = :epochs
# param1 = :(builder.n1)
param2 = :(builder.n2)

r1 = range(nn, param1, lower=10, upper=4000, scale=:log10)
# r1 = range(nn, param1, lower=1, upper=100, scale=:linear)
r2 = range(nn, param2, lower=5, upper=100, scale=:linear)

In [None]:
self_tuning_nn_model = TunedModel(model=nn,
                                    tuning=Grid(goal=64),
                                    resampling=Holdout(fraction_train=0.7), 
                                    measure=cross_entropy,
                                    acceleration=CPUThreads(),
                                    range=[r1, r2])

In [None]:
self_tuning_nn = machine(self_tuning_nn_model, X_stand, y)

In [None]:
z = fit!(self_tuning_nn, rows=train)

In [None]:
plot(self_tuning_nn)

In [None]:
best = fitted_params(self_tuning_nn)
best

In [None]:
best.best_model

In [None]:
best_loss = z.report.best_result.measurement[1]

In [None]:
best_n2 = best.best_model.builder.n2

In [None]:
fn = "Figures/Grid_NN_$(param1):$(best.best_model.epochs)_x_$(param2):$(best_n2)_bestloss:$(best_loss)"
png(replace(fn,'.' => ','))

### GridSearch for Learning Rate

In [None]:
nn = NeuralNetworkClassifier(builder=CustomNN(best_n1,best_n2))

In [None]:
nn.optimiser.eta = 0.001
nn.epochs = 60
nn.batch_size = batch_sz
nn.lambda = 0.01

In [None]:
Net = machine(nn, X_stand, y)

In [None]:
fit!(Net, rows=train, verbosity=1, force=true)

In [None]:
param1 = :epochs
param2 = :(optimiser.eta)

r1 = range(nn, param1, lower=10, upper=4000, scale=:linear)
r2 = range(nn, param2, lower=0.0001, upper=0.1, scale=:log10)

In [None]:
self_tuning_nn_model = TunedModel(model=nn,
                                    tuning=Grid(goal=50),
                                    resampling=Holdout(fraction_train=0.7), 
                                    measure=cross_entropy,
                                    acceleration=CPUThreads(),
                                    range=[r1, r2])

In [None]:
self_tuning_nn = machine(self_tuning_nn_model, X_stand, y)

In [None]:
z = fit!(self_tuning_nn, rows=train)

In [None]:
plot(self_tuning_nn)

In [None]:
best = fitted_params(self_tuning_nn)
best

In [None]:
best.best_model

In [None]:
best_loss = z.report.best_result.measurement[1]

In [None]:
best_eta = round(best.best_model.optimiser.eta, digits=5)

In [None]:
fn = "Figures/Grid_NN_$(param1):$(best.best_model.epochs)_x_$(param2):$(best_eta)_bestloss:$(best_loss)_hidden:$b"
png(replace(fn,'.' => ','))

### GridSearch for Regularization

In [None]:
nn = NeuralNetworkClassifier(builder=CustomNN(best_n1,best_n2))

In [None]:
nn.optimiser.eta = best_eta
nn.epochs = 60
nn.batch_size = batch_sz
nn.lambda = 0.01

In [None]:
Net = machine(nn, X_stand, y)

In [None]:
param1 = :epochs
param2 = :lambda

r1 = range(nn, param1, lower=100, upper=4000, scale=:linear)
r2 = range(nn, param2, lower=0.0001, upper=10, scale=:log10)

In [None]:
self_tuning_nn_model = TunedModel(model=nn,
                                    tuning=Grid(goal=50),
                                    resampling=Holdout(fraction_train=0.7), 
                                    measure=cross_entropy,
                                    acceleration=CPUThreads(),
                                    range=[r1, r2])

In [None]:
self_tuning_nn = machine(self_tuning_nn_model, X_stand, y)

In [None]:
z = fit!(self_tuning_nn, rows=train)

In [None]:
plot(self_tuning_nn)

In [None]:
best = fitted_params(self_tuning_nn)
best

In [None]:
best.best_model

In [None]:
best_loss = z.report.best_result.measurement[1]

In [None]:
best_lambda = best.best_model.lambda

In [None]:
fn = "Figures/Grid_NN_$(param1):$(best.best_model.epochs)_x_$(param2):$(best_lambda)_bestloss:$(best_loss)"
png(replace(fn,'.' => ','))

### Learning Curve

In [None]:
lc_model = NeuralNetworkClassifier(builder=CustomNN(best_n1,best_n2))
lc_model.optimiser.eta = best_eta;
lc_model.lambda = best_lambda
lc_model.epochs = 4000
lc_model.batch_size = batch_sz
lc_model

In [None]:
# lc_model = NeuralNetworkClassifier(builder=CustomNN(80,80))
# lc_model.optimiser.eta = 0.001;
# lc_model.lambda = best_lambda
# lc_model.epochs = 3000
# lc_model.batch_size = 8

In [None]:
data_schedule, training_losses, valid_losses = learn_curve(lc_model, X[train,:], y[train], acc)

In [None]:
plot(data_schedule, training_losses)
plot!(data_schedule, valid_losses)

In [None]:
png("learning_curve_nn_3000epochs_balance")

# Final Results

In [None]:
final_model = NeuralNetworkClassifier(builder=CustomNN(best_n1,best_n2))

In [None]:
final_model.optimiser.eta = lc_model.optimiser.eta = best_eta;
final_model.batch_size = batch_sz
final_model.lambda = best_lambda
final_model.epochs = 3000

In [None]:
final_model = NeuralNetworkClassifier(builder=CustomNN(80,80))
final_model.optimiser.eta = 0.01;
final_model.lambda = 0.1
final_model.epochs = 3000
final_model.batch_size = batch_sz

### Test Performance

In [None]:
Final_Net = machine(final_model, X_stand, y)

In [None]:
fit!(Final_Net, rows=train, force=true, verbosity=1)

In [None]:
vals = collect(0:5:max_epochs)
r = range(final_model, :epochs, values=vals)
# r = range(nn, :epochs, lower=0, upper=max_epochs)
curve = learning_curve(Final_Net, 
                        range=r, 
                        resampling=Holdout(fraction_train=0.7), 
#                         resampling=CV(nfolds=4), 
                        measure=cross_entropy, 
                        acceleration=CPUThreads())

In [None]:
plot(curve.parameter_values,
     curve.measurements,
     xlab=curve.parameter_name,
     ylab="Cross Entropy",
     label="Validation", lw=2)
plot!(Final_Net.report.training_losses, label="Training", lw=2)

In [None]:
nn_acc = evaluate!(Final_Net, resampling=CV(shuffle=true), measure=[cross_entropy, acc], verbosity=1)

In [None]:
ŷ = MLJ.predict(Final_Net, X_stand[test,:]);

In [None]:
cross_entropy(ŷ, y[test]) |> mean

In [None]:
acc(ŷ, y[test])

In [None]:
confusion_matrix(mode.(ŷ), y[test])

In [None]:
fitted_params(Final_Net)

In [None]:
report(Final_Net)

A lot slower than Holdout

In [None]:
# self_tuning_nn_model = TunedModel(model=nn,
#                                     tuning=Grid(goal=50),
#                                     resampling=CV(), 
#                                     measure=cross_entropy,
#                                     acceleration=CPUThreads(),
#                                     range=[r_its, r_lr])

# self_tuning_nn = machine(self_tuning_nn_model, X_stand, y)

In [None]:
# fit!(self_tuning_nn)

In [None]:
# plot(self_tuning_nn)