In [None]:
using Pkg
Pkg.activate("..")
Pkg.instantiate()
Pkg.update()

In [None]:
using Random

Random.seed!(1235)

## Interpretable land cover classification with modal decision trees (extra)
[
    Interpretable land cover classification with modal decision trees
](
    https://www.tandfonline.com/doi/pdf/10.1080/22797254.2023.2262738
)

To run this notebook, you first need to download the following
datasets and place them in the `/datasets/paviaU` folder:
- [Pavia University](https://www.ehu.eus/ccwintco/uploads/e/ee/PaviaU.mat)
- [Pavia University GT](https://www.ehu.eus/ccwintco/uploads/5/50/PaviaU_gt.mat)


In [None]:
include("../scripts/land-cover.jl")
data_dir = "../datasets/"

X_df, y = LandCoverDataset(
    "Pavia University";
    window_size          = 3,
    ninstances_per_class = 40,
    pad_window_size      = 5,
);

In [None]:
countmap(y)

In [None]:
length.(X_df)

In [None]:
X_df = broadcast(values->Matrix{Float64}(values), X_df)

In [None]:
using DataFrames

# Let's unwind the spatial axes
X_df_static = Matrix(X_df)
cols = []
for i_var in 1:size(X_df_static, 2)
    var_unroll = cat(X_df_static[:,i_var]...; dims = 3)
    append!(cols, eachrow(reshape(var_unroll, (9, nrow(X_df)))))
end
X_df_static = DataFrame(
    cols,
    ["$n[$i][$j]" for n in names(X_df) for i in 1:3 for j in 1:3]
)

In [None]:
using MultiData

X_multimodal = MultiModalDataset([X_df, X_df_static])

In [None]:
using ModalDecisionTrees

model = ModalDecisionTree(; relations = :RCC8)

In [None]:
using MLJ

modalmach = machine(model, X_multimodal, y; scitype_check_level=0)

In [None]:
fit!(modalmach)

In [None]:
fitted_params(modalmach).tree

In [None]:
ðŸŒ± = report(modalmach).model

In [None]:
using SoleModels

ðŸŒ² = listrules(ðŸŒ±)

In [None]:
# Every symbolic model (including ruleslist) can have has additional information
# attached
println(ðŸŒ²[1])

ruleinfo = SoleModels.info(ðŸŒ²[1])
println(keys(ruleinfo))

In [None]:
ruleinfo[:supporting_predictions] |> length

In [None]:
sort(readmetrics.(ðŸŒ²), by=x->x[:coverage], rev = true)

In [None]:
metricstable(ðŸŒ²)

**Extra**: let's retrain our model, but in cross-validation! (it will take some
time...)

In [None]:
# If you have more time, train in cross-validation!
e = evaluate!(
    machine(model, X_multimodal, y; scitype_check_level=0);
    resampling=StratifiedCV(rng = Random.Xoshiro(1), shuffle=true, nfolds = 2),
    measures=[accuracy],
    verbosity=0,
    check_measure=false
)

In [None]:
# Test accuracies per fold
e.per_fold

In [None]:
dtrees = map((((train_idxs, test_idxs), rep),)->begin
    predictions, tree_test = rep.sprinkle(
        slicedataset(X_multimodal, test_idxs),
        y[test_idxs];
        simplify = true
    )
    tree_test
end, zip(e.train_test_rows, e.report_per_fold))

In [None]:
ruleslist = vcat(listrules.(dtrees)...)

In [None]:
# Every symbolic model (including ruleslist) can have has additional information
# attached
println(ruleslist[1])

ruleinfo = SoleModels.info(ruleslist[1])
println(keys(ruleinfo))

In [None]:
ruleinfo[:supporting_predictions] |> length

In [None]:
sort(readmetrics.(ruleslist), by=x->x[:coverage], rev = true)

In [None]:
goodrules = sort(ruleslist, by=r->readmetrics(r)[:coverage], rev = true)
printmodel.(goodrules; show_metrics = true, threshold_digits = 4);

**Exercise**: (if you have time) try with 10 folds!