# Setup models, dataset

In [None]:
using SpecialFunctions
using LinearAlgebra
using Random
using Distributions
using ABCRN

global n = 20

struct Model1 <: Model end
struct Model2 <: Model end
struct Model3 <: Model end
import ABCRN: simulate

function simulate(m::Model1)
    param = rand(Exponential(1))
    return rand(Exponential(param), n)
end
function simulate(m::Model2)
    param = rand(Normal())
    return rand(LogNormal(param,1), n)
end
function simulate(m::Model3)
    param = rand(Exponential(1))
    return rand(Gamma(2,1/param), n)
end

m1, m2, m3 = Model1(), Model2(), Model3()
lh_m1(s::Vector) = exp(log(gamma(n+1)) - (n+1)*log(1+s[1]))
lh_m2(s::Vector) = exp(-s[2]^2/(2n*(n+1)) - (s[3]^2)/2 + (s[2]^2)/(2n) - s[2]) * (2pi)^(-n/2)*(n+1)^(-1/2)
lh_m3(s::Vector) = exp(s[2])*gamma(2n+1)/gamma(2)^n * (1+s[1])^(-2n-1)

ss_func(y) = [sum(y), sum(log.(y)), sum(log.(y).^2)]
dist_l2(s_sim,s_obs) = sqrt(dot(s_sim,s_obs))

observations = simulate(m3)
ss_observations = ss_func(observations)
models = [m1, m2, m3]
abc_trainset = abc_model_choice_dataset(models, ss_observations, ss_func, dist_l2, 29000, 29000)
abc_testset = abc_model_choice_dataset(models, ss_observations, ss_func, dist_l2, 1000, 1000)

list_lh = [lh_m1, lh_m2, lh_m3]
prob_model(ss::Vector, list_lh, idx_model) = list_lh[idx_model](ss) / sum([list_lh[i](ss) for i = eachindex(list_lh)])
prob_model(ss::Vector, idx_model) = prob_model(ss, list_lh, idx_model)
prob_model3(ss::Vector) = prob_model(ss, list_lh, 3)


# Plot

In [None]:
using Plots

p = plot(title="Trainset")
colors = ["black", "red", "green"]
begin_idx = 1
for i = 1:3
    models_i = findall(x->x==i, abc_testset.models_indexes)
    nbr_obs = length(models_i)
    end_idx = begin_idx + nbr_obs - 1
    lh = list_lh[i]
    scatter!(p, begin_idx:end_idx, 
             vec(mapslices(prob_model3, abc_testset.summary_stats_matrix[:,models_i], dims = 1)), 
             color = colors[i], markersize = 3.0, markershape = :cross, label = "Model $i")
    global begin_idx = end_idx + 1
end
p

# Classification models

In [None]:
using ScikitLearn
@sk_import linear_model: LogisticRegression
@sk_import ensemble: RandomForestClassifier
@sk_import metrics: (classification_report, confusion_matrix)
@sk_import neighbors: KNeighborsClassifier

X_trainset = transpose(abc_trainset.X)
X_testset = transpose(abc_testset.X)

logit_reg = fit!(LogisticRegression(), X_trainset, abc_trainset.y)
y_pred_logit = predict(logit_reg, X_testset)
println(classification_report(y_pred = y_pred_logit, y_true = abc_testset.y))

rf_clf = fit!(RandomForestClassifier(n_estimators=500), X_trainset, abc_trainset.y)
y_pred_rf = predict(rf_clf, X_testset)
println(classification_report(y_pred = y_pred_rf, y_true = abc_testset.y))

knn_clf = fit!(KNeighborsClassifier(n_neighbors=20), X_trainset, abc_trainset.y)
y_pred_knn = predict(rf_clf, X_testset)
println(classification_report(y_pred = y_pred_rf, y_true = abc_testset.y))


# RF ABC

In [None]:
res_rf = rf_abc_model_choice(models, ss_observations, ss_func, 29000; 
                             hyperparameters_range = Dict(:n_estimators => [500]))
println(classification_report(y_pred = predict(res_rf.clf, X_testset), y_true = abc_testset.y))
println(confusion_matrix(y_pred = predict(res_rf.clf, X_testset), y_true = abc_testset.y))


In [None]:
dict_params = Dict()
for param in keys(get_params(res_rf.clf))
    dict_params[Symbol(param)] = get_params(res_rf.clf)[param]
end
RandomForestClassifier(;dict_params...)

In [None]:
oob_votes = res_rf.clf.oob_decision_function_
y_pred_oob = argmax.([oob_votes[i,:] for i = 1:size(oob_votes)[1]])
@show mean(y_pred_oob .== res_rf.reference_table.y)
@show res_rf.clf.oob_score_