# Learning and inspecting a Modal Decision Tree

In [None]:
using Pkg
Pkg.activate(".")
Pkg.instantiate()
Pkg.update()
Pkg.status()

In [None]:
# Import libraries for statistics & Machine Learning
using Random
using DataFrames
using MLJ
using Plots

In [None]:
# Import the Sole framework
using Sole

# Load an example time-series classification dataset as a tuple (DataFrame, Vector{String})
X, y = Sole.load_arff_dataset("NATOPS");

In [None]:
X

In [None]:
names(X)

In [None]:
# Let's inspect an instance for each class.
plot(map(i->plot(collect(X[i,:]), labels=nothing,title=y[i]), 1:30:180)..., layout = (2, 3), size = (1500,400))

In [None]:
# All instances, grouped per class
plot(map(i->plot(collect.(eachrow(X[i:(i+30),:])), labels=nothing,title=y[i]), 1:30:180)..., layout = (2, 3), size = (1500,400))

In [None]:
# Randomly split the data: 20% training, 80% testing
N = nrow(X)
perm = randperm(Random.MersenneTwister(1), N)
train_idxs, test_idxs = perm[1:round(Int, N*.2)], perm[round(Int, N*.2)+1:end];
println("Using $(length(train_idxs)) instances for training")
println("Using $(length(test_idxs)) instances for testing")

In [None]:
DecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree verbosity=0

# Instantiate the tree learning algorithm
model = DecisionTreeClassifier(;)

In [None]:
# Bind data to learning algorithm
mach = machine(model, X, y)

# Train!
@time fit!(mach; rows=train_idxs)

In [None]:
X_static = Matrix(X)
cols = []
for i_var in 1:size(X_static, 2)
    var_unroll = hcat(X_static[:,i_var]...)
    append!(cols, eachrow(var_unroll))
end
X_static = DataFrame(cols, ["$n[$i]" for n in names(X) for i in 1:51])

In [None]:
# Bind data to learning algorithm
mach = machine(model, X_static, y)

# Train!
@time fit!(mach; rows=train_idxs);

println(fitted_params(mach).tree)

# Compute accuracy
yhat = predict_mode(mach; rows=test_idxs)
acc = MLJ.accuracy(yhat, y[test_idxs])
println("Accuracy: $(acc)")

In [None]:
X_mean = DataFrame(mean.(Matrix(X)), ["mean($n)" for n in names(X)])

In [None]:
# Bind data to learning algorithm
mach = machine(model, X_mean, y)

# Train!
@time fit!(mach; rows=train_idxs);

println(fitted_params(mach).tree)

# Compute accuracy
yhat = predict_mode(mach; rows=test_idxs)
acc = MLJ.accuracy(yhat, y[test_idxs])
println("Accuracy: $(acc)")

In [None]:
X_features = DataFrame([
    eachcol(mean.(Matrix(X)))...,
    eachcol(maximum.(Matrix(X)))...,
    eachcol(minimum.(Matrix(X)))...,
], ["$f($n)" for n in names(X) for f in ["mean", "max", "min"]])

In [None]:
# Bind data to learning algorithm
mach = machine(model, X_features, y)

# Train!
@time fit!(mach; rows=train_idxs);

println(fitted_params(mach).tree)

# Compute accuracy
yhat = predict_mode(mach; rows=test_idxs)
acc = MLJ.accuracy(yhat, y[test_idxs])
println("Accuracy: $(acc)")

In [None]:
using ModalDecisionTrees

# Instantiate the learning algorithm
model = ModalDecisionTree(; relations = :IA7);

In [None]:
# Bind data to learning algorithm
mach = machine(model, X, y)

# Train!
@time fit!(mach; rows=train_idxs)

In [None]:
# Compute accuracy
yhat = predict_mode(mach; rows=test_idxs)
MLJ.accuracy(yhat, y[test_idxs])

In [None]:
report(mach).printmodel()

In [None]:
# Access model
tree_train = report(mach).model

# Extract the corresponding ruleset
ruleset = listrules(tree_train);

# Print ruleset
printmodel.(ruleset; show_metrics = true, threshold_digits = 2, variable_names_map = [names(X)], parenthesize_atoms = false);

In [None]:
# Sprinkle the model with the test instances!
predictions, tree_test = report(mach).sprinkle(X[test_idxs,:], y[test_idxs]);

# Extract ruleset and print its metrics
ruleset_test = listrules(tree_test)

printmodel.(ruleset_test; show_metrics = true, threshold_digits = 2, variable_names_map = [names(X)]);

# In the classification scenario, rules for the same class can be joined via logical conjunction (∨)
joined_ruleset_test = joinrules(ruleset_test)
printmodel.(joined_ruleset_test; show_metrics = true, variable_names_map = [names(X)], threshold_digits = 3);