In [1]:
using CSV, DataFrames, Statistics

In [2]:
prefix = "aistats-rev/"

"aistats-rev/"

In [3]:
function method_category(meth)
    if startswith(meth, "Imp-then-Reg")
        return "Imp-then-Reg"
    elseif startswith(meth, "Joint Imp-then-Reg")
        return "Joint Imp-then-Reg"
    elseif meth ∈ ["Static", "Affine", "Finite"]
        return "Adaptive LR"
    elseif startswith(meth, "Complete Features")
        return "Complete Features"
    else 
        return meth
    end
end

method_category (generic function with 1 method)

In [4]:
pb_datasets = ["cylinder-bands", "ozone-level-detection-eight", "ozone-level-detection-one", "thyroid-disease-thyroid-0387", "trains",
                "credit-approval", "Ecdat-Mofa", "sleep"]

8-element Vector{String}:
 "cylinder-bands"
 "ozone-level-detection-eight"
 "ozone-level-detection-one"
 "thyroid-disease-thyroid-0387"
 "trains"
 "credit-approval"
 "Ecdat-Mofa"
 "sleep"

## For Real X - Syn Y Experiments

In [15]:
prefix ="aistats-rev/"
setting = prefix*"fakey/"
for y_model in ["linear", "nn"]
    for m_model = ["mar", "nmar", "mar_adv"]
        dir = y_model*"_"*m_model*"/"
        directory = setting*dir
        
        filelist = [f for f in readdir(directory*"all/") if endswith(f, ".csv")]
        res = similar(CSV.read(directory*"all/"*filelist[1], DataFrame),0)
        
        for subdir = ["all/", "itr/"]
            filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv")]
#             res = similar(CSV.read(directory*subdir*filelist[1], DataFrame),0)
            for i in 1:length(filelist)
                res = vcat(res, CSV.read(directory*subdir*filelist[i], DataFrame))
            end
        end

    #     for subdir = ["xgb/"]
    #         try 
    #             filelist = [f for f in readdir("xgboost/"*directory*subdir) if endswith(f, ".csv")]
    #             for i in 1:length(filelist)
    #                 res = vcat(res, CSV.read("xgboost/"*directory*subdir*filelist[i], DataFrame))
    #             end
    #         catch 
    #             println("No XGBoost results for "*directory*subdir)
    #         end
    #     end

        res[!,:method_cat] = map(t -> method_category(t), res[:,:method])
        res[!,:X_setting] .= "real_X_"*m_model
        res[!,:Y_setting] .= "syn_Y_"*y_model

        CSV.write(directory*"FINAL_results.csv", res)
    end
end

In [16]:
setting = prefix*"fakey/"

for y_model in ["linear", "nn"]
    for m_model = ["mar", "nmar", "mar_adv"]
        dir = y_model*"_"*m_model*"/"
        directory = setting*dir
  
        res = CSV.read(directory*"FINAL_results.csv", DataFrame)
        
        res[!,:method] .= map(t -> (t == "Affine" ? "Adaptive LR - Affine" : t), res[:,:method]) 
        res[!,:method] .= map(t -> (t == "Finite" ? "Adaptive LR - Finite" : t), res[:,:method]) 
        res[!,:method] .= map(t -> (t == "Static" ? "Adaptive LR - Affine intercept only" : t), res[:,:method])
        
        filter!(t -> t[:dataset] ∉ pb_datasets, res)

        for method in ["Oracle X", "Oracle XM", "Complete Features", "Imp-then-Reg 1", "Imp-then-Reg 2", "Imp-then-Reg 3", "Imp-then-Reg 4", "Imp-then-Reg 5", "Joint Imp-then-Reg", "Adaptive LR"]
            aux = filter(t -> startswith(t[:method], method), res)

            idcols = [:dataset, :X_setting, :Y_setting, :SNR, :k, :kMissing, :splitnum]
            gd = groupby(aux, idcols)

            aux = similar(aux, 0)
            for subdf in gd 
                scoremax = argmax(subdf[:,:score])
                push!(aux, subdf[scoremax,names(aux)])
            end
            aux[!,:method] .= method*" - best"

            res = vcat(res, aux)
        end
        
        CSV.write(directory*"FINAL_results.csv", res)
    end
end

Sanity check

In [17]:
res = DataFrames.similar(CSV.read(prefix*"fakey/"*"linear"*"_"*"mar"*"/FINAL_results.csv", DataFrame), 0)
for y_model in ["linear", "nn"]
    for m_model = ["mar", "nmar", "mar_adv"]
        dir = y_model*"_"*m_model*"/"
        directory = prefix*"fakey/"*dir
  
        res = vcat(res, CSV.read(directory*"FINAL_results.csv", DataFrame))
    end
end

In [18]:
gd = groupby(res, [:dataset, :X_setting, :Y_setting, :kMissing, :method])
aggres = combine(gd, nrow)

Row,dataset,X_setting,Y_setting,kMissing,method,nrow
Unnamed: 0_level_1,String,String15,String15,Int64,String,Int64
1,COUNT-loomis,real_X_mar,syn_Y_linear,0,XGBoost,10
2,COUNT-loomis,real_X_mar,syn_Y_linear,0,Oracle X - linear,10
3,COUNT-loomis,real_X_mar,syn_Y_linear,0,Oracle XM - linear,10
4,COUNT-loomis,real_X_mar,syn_Y_linear,0,Complete Features - linear,10
5,COUNT-loomis,real_X_mar,syn_Y_linear,0,Oracle X - tree,10
6,COUNT-loomis,real_X_mar,syn_Y_linear,0,Oracle XM - tree,10
7,COUNT-loomis,real_X_mar,syn_Y_linear,0,Complete Features - tree,10
8,COUNT-loomis,real_X_mar,syn_Y_linear,0,Oracle X - rf,10
9,COUNT-loomis,real_X_mar,syn_Y_linear,0,Oracle XM - rf,10
10,COUNT-loomis,real_X_mar,syn_Y_linear,0,Complete Features - rf,10


In [22]:
feasible_combinations = unique(aggres[:,[:dataset, :kMissing]])
feasible_combinations = crossjoin(
                            crossjoin(
                                crossjoin(feasible_combinations, DataFrame(splitnum=1:10)),
                                DataFrame(X_setting = ["real_X_mar", "real_X_nmar", "real_X_mar_adv"])), 
                                DataFrame(Y_setting = ["syn_Y_linear", "syn_Y_nn"])
                            )

pbdatasets = antijoin(feasible_combinations, unique(res[:,[:dataset, :kMissing, :splitnum, :X_setting, :Y_setting, :method]]), on=[:dataset, :kMissing, :splitnum, :X_setting, :Y_setting])
pbdatasets = unique(pbdatasets[:,[:dataset, :X_setting, :Y_setting, :splitnum]])

dataset_list = [d for d in readdir("../datasets/") if !startswith(d, ".")]
sort!(dataset_list)

pbdatasets[!,:array_num] .= (pbdatasets[:,:splitnum] .- 1) .* 71
pbdatasets[!,:array_num] .+= map(t -> findfirst(t .== dataset_list)-1, pbdatasets[:,:dataset])

pbdatasets[!,:back_dnum] .= map(t -> dataset_list[mod(t, 71) + 1], pbdatasets[:,:array_num])
pbdatasets[!,:back_splitnum] .= map(t -> div(t, 71) + 1, pbdatasets[:,:array_num])

@assert all(pbdatasets[:,:back_dnum] .== pbdatasets[:,:dataset])
@assert all(pbdatasets[:,:back_splitnum] .== pbdatasets[:,:splitnum])

In [23]:
function list_to_slurmarray(l)
    s = ""
    begin_seq = -1; last_seq = -1; is_sequence = false

    for an in l
        if an == last_seq + 1
            last_seq = an
        else 
            if is_sequence
                if begin_seq == last_seq
                    s *= string(begin_seq)*","
                else
                    s *= string(begin_seq)*"-"*string(last_seq)*","
                end
            end
            begin_seq = an
            last_seq = an
            is_sequence = true
        end
    end 
    s
end

list_to_slurmarray (generic function with 1 method)

In [24]:
for y_model in ["linear", "nn"]
    println(y_model)
    for m_model = ["mar", "nmar", "mar_adv"]
        println(m_model)
        aux = filter(t -> t[:Y_setting] == "syn_Y_"*y_model && t[:X_setting] == "real_X_"*m_model, pbdatasets)
        l = sort(unique(aux[:,:array_num]))
        @show list_to_slurmarray(l)
    end
    println()
end

linear
mar


list_to_slurmarray(l) = "47,95,114,189,260,331,402,473,544,592,615,"
nmar
list_to_slurmarray(l) = "24,47,118,189,256,260,327,331,402,473,544,591,615,663,"
mar_adv
list_to_slurmarray(l) = "23-24,47,118,189,260,331,402,473,544,615,"

nn
mar
list_to_slurmarray(l) = "47,95,118,165,189,260,331,402,449-450,473,520-521,544,591-592,615,662-663,"
nmar
list_to_slurmarray(l) = "24,47,95,118,166,189,237,260,308,331,379,402,450,473,521,544,592,615,663,"
mar_adv
list_to_slurmarray(l) = "23-24,47,95,118,166,189,237,260,308,331,378-379,402,449-450,473,520-521,544,591-592,615,662-663,"



## For Real Data Experiments

In [5]:
prefix

"aistats-rev/"

In [6]:
for directory = prefix .* ["realy/"]
    filelist = [f for f in readdir(directory*"all/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
    res = similar(CSV.read(directory*"all/"*filelist[1], DataFrame),0)
    
    # for subdir = ["2022-08-23/", "rf_mia/"]    
    for subdir = ["all/", "itr/", "jitr/"]
        filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
#         res = similar(CSV.read(directory*subdir*filelist[1], DataFrame),0)
        for i in 1:length(filelist)
            res = vcat(res, CSV.read(directory*subdir*filelist[i], DataFrame))
        end
    end

#     for subdir = ["xgb/"]
#         try 
#             filelist = [f for f in readdir("xgboost/"*directory*subdir) if endswith(f, ".csv")]
#             for i in 1:length(filelist)
#                 res = vcat(res, CSV.read("xgboost/"*directory*subdir*filelist[i], DataFrame))
#             end
#         catch 
#             println("No XGBoost results for "*directory*subdir)
#         end
#     end
    
    # filter!(t -> t[:k] > 0, res) #Remove dataset with only a bias term
    res[!,:method_cat] = map(method_category, res[:,:method])
    res[!,:X_setting] .= "real_X"
    res[!,:Y_setting] .= "real_Y"
    
    CSV.write(directory*"FINAL_results.csv", res)
end

Create `best` variant

In [7]:
res = CSV.read(prefix*"realy/"*"FINAL_results.csv", DataFrame)

filter!(t -> t[:dataset] ∉ pb_datasets, res)

res[!,:method] .= map(t -> (t == "Affine" ? "Adaptive LR - Affine" : t), res[:,:method]) 
res[!,:method] .= map(t -> (t == "Finite" ? "Adaptive LR - Finite" : t), res[:,:method]) 
res[!,:method] .= map(t -> (t == "Static" ? "Adaptive LR - Affine intercept only" : t), res[:,:method])

for method in ["Complete Features", "Imp-then-Reg 1", "Imp-then-Reg 2", "Imp-then-Reg 3", "Imp-then-Reg 4", "Imp-then-Reg 5", "Joint Imp-then-Reg", "Adaptive LR"]
    aux = filter(t -> startswith(t[:method], method), res)
    # @show size(aux)
    idcols = [:dataset, :SNR, :k, :kMissing, :splitnum]
    gd = groupby(aux, idcols)

    aux = similar(aux, 0)
    for subdf in gd 
        scoremax = argmax(subdf[:,:score])
        push!(aux, subdf[scoremax,names(aux)])
    end
    aux[!,:method] .= method*" - best"

    res = vcat(res, aux)
end

CSV.write(prefix*"realy/"*"FINAL_results.csv", res)

"aistats-rev/realy/FINAL_results.csv"

Sanity check

In [8]:
res = CSV.read(prefix*"realy/"*"FINAL_results.csv", DataFrame)
unique(filter( t-> t[:nrow] < 10, combine(groupby(res, [:dataset, :method]), nrow))[:,[:dataset, :nrow]])

Row,dataset,nrow
Unnamed: 0_level_1,String,Int64


In [9]:
feasible_combinations = unique(res[:,[:dataset]])
feasible_combinations = crossjoin(feasible_combinations, DataFrame(splitnum=1:10))

pbdatasets = leftjoin(feasible_combinations, res, on=[:dataset, :splitnum])
gd = groupby(pbdatasets, [:dataset, :splitnum])
sort(combine(gd, nrow), :nrow)

# unique(res[:,[:dataset, :kMissing, :splitnum, :X_setting, :Y_setting]]), on=[:dataset, :kMissing, :splitnum, :X_setting, :Y_setting])
# pbdatasets = unique(pbdatasets[:,[:dataset, :X_setting, :Y_setting, :splitnum]])

# dataset_list = [d for d in readdir("../datasets/") if !startswith(d, ".")]
# sort!(dataset_list)

# pbdatasets[!,:array_num] .= (pbdatasets[:,:splitnum] .- 1) .* 71
# pbdatasets[!,:array_num] .+= map(t -> findfirst(t .== dataset_list)-1, pbdatasets[:,:dataset])

# pbdatasets[!,:back_dnum] .= map(t -> dataset_list[mod(t, 71) + 1], pbdatasets[:,:array_num])
# pbdatasets[!,:back_splitnum] .= map(t -> div(t, 71) + 1, pbdatasets[:,:array_num])

# @assert all(pbdatasets[:,:back_dnum] .== pbdatasets[:,:dataset])
# @assert all(pbdatasets[:,:back_splitnum] .== pbdatasets[:,:splitnum])

Row,dataset,splitnum,nrow
Unnamed: 0_level_1,String,Int64,Int64
1,COUNT-loomis,1,41
2,COUNT-loomis,2,41
3,COUNT-loomis,3,41
4,COUNT-loomis,4,41
5,COUNT-loomis,5,41
6,COUNT-loomis,6,41
7,COUNT-loomis,7,41
8,COUNT-loomis,8,41
9,COUNT-loomis,9,41
10,COUNT-loomis,10,41


In [10]:
dataset_list = [d for d in readdir("../datasets/") if !startswith(d, ".")]
sort!(dataset_list)

71-element Vector{String}:
 "COUNT-loomis"
 "Ecdat-MCAS"
 "Ecdat-Males"
 "Ecdat-Mofa"
 "Ecdat-RetSchool"
 "Ecdat-Schooling"
 "MASS-Cars93"
 "MASS-Pima.tr2"
 "MASS-survey"
 "Zelig-coalition2"
 ⋮
 "thyroid-disease-allhyper"
 "thyroid-disease-allhypo"
 "thyroid-disease-allrep"
 "thyroid-disease-dis"
 "thyroid-disease-sick"
 "thyroid-disease-sick-euthyroid"
 "thyroid-disease-thyroid-0387"
 "trains"
 "wiki4he"

In [11]:
unique(filter(t -> t[:nrow] < 10, combine(groupby(filter(t -> !startswith(t[:method], "Imp-then-Reg 4 - linear"), res), [:dataset, :method]), nrow))[:,:dataset])

String[]

In [13]:
unfinished_datasets = filter(t -> t[:nrow] < 10, 
    combine(
        groupby(    combine(groupby(filter(t -> !startswith(t[:method], "Imp-then-Reg 4 - linear"), res), [:dataset, :method]), nrow), 
                    [:dataset]), 
        :nrow => minimum => :nrow))[:,:dataset]
sort([findfirst(i .== dataset_list)-1 for i in unfinished_datasets])

Any[]

In [14]:
unfinished_datasets = filter(t -> t[:nrow] < 10, 
    combine(
        groupby(    combine(groupby(res, [:dataset, :method]), nrow), 
                    [:dataset]), 
        :nrow => minimum => :nrow))[:,:dataset]
sort([findfirst(i .== dataset_list)-1 for i in unfinished_datasets])

Any[]

In [30]:
unfinished_datasets = filter(t -> t[:nrow] < 10, 
    combine(
        groupby(    combine(groupby(filter(t -> !startswith(t[:method], "Imp-then-Reg"), res), [:dataset, :method]), nrow), 
                    [:dataset]), 
        :nrow => minimum => :nrow))[:,:dataset]
sort([findfirst(i .== dataset_list)-1 for i in unfinished_datasets])

Any[]

In [None]:
setdiff(), pb_datasets)

# For Synthetic-Data Experiments

In [None]:
1+1

In [None]:
setting = prefix*"synthetic_discrete/"
# for y_model in ["linear", "tree", "nn"]
for y_model in ["linear", "nn"]
    for m_model = ["mar", "censoring"]
        dir = y_model*"_"*m_model*"/"
        directory = setting*dir
        
        filelist = [f for f in readdir(directory*"all/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
        res = similar(CSV.read(directory*"all/"*filelist[1], DataFrame),0)

        for subdir = ["all/"]
            filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
            @show filelist

    #         res = similar(CSV.read(directory*subdir*filelist[1], DataFrame),0)
            for i in 1:length(filelist)
                aux = CSV.read(directory*subdir*filelist[i], DataFrame)
                # if any(aux[:,:kMissing] .> 0)
                #     missingproba = unique(aux[aux[:,:pMissing] .> 0,:pMissing])[1]
                #     aux[!,:pMissing] .= missingproba
                # end
                try
                    res = vcat(res, aux)
                catch 
                    println("Error with ", directory*subdir*filelist[i])
                end
            end
        end

        # for subdir = ["xgb/"]
        #     try 
        #         filelist = [f for f in readdir("xgboost/"*directory*subdir) if endswith(f, ".csv")]
        #         for i in 1:length(filelist)
        #             res = vcat(res, CSV.read("xgboost/"*directory*subdir*filelist[i], DataFrame))
        #         end
        #     catch 
        #         println("No XGBoost results for "*directory*subdir)
        #     end
        # end

        res[!,:method] .= map(t -> replace(t, "Encoding as new category" => "Imp-then-Reg 4"), res[:,:method])
        res[!,:method] .= map(t -> replace(t, "Mode impute" => "Imp-then-Reg 5"), res[:,:method])

        # filter!(t -> t[:k] > 0, res) #Remove dataset with only a bias term
        res[!,:method_cat] = map(method_category, res[:,:method])
        res[!,:X_setting] .= "syn_X_"*m_model
        res[!,:Y_setting] .= "syn_Y_"*y_model

        for method in ["Imp-then-Reg 4", "Imp-then-Reg 5"]
            aux = filter(t -> startswith(t[:method], method), res)
            # @show nrow(aux)
            idcols = [:dataset, :X_setting, :Y_setting, :SNR, :k, :pMissing, :splitnum]
            gd = groupby(aux, idcols)

            aux = similar(aux, 0)
            for subdf in gd 
                scoremax = argmax(subdf[:,:score])
                # @show subdf[scoremax,names(aux)]
                push!(aux, subdf[scoremax,names(aux)])
            end
            aux[!,:method] .= method*" - best"

            res = vcat(res, aux)
        end

        CSV.write(directory*"FINAL_results.csv", res)   
    end
end

In [None]:
setting = prefix*"synthetic/"
files_with_issues = []
# for y_model in ["linear", "tree", "nn"]
for y_model in ["linear", "nn"]
    for m_model = ["mar", "censoring"]
        dir = y_model*"_"*m_model*"/"
        directory = setting*dir
        filelist = [f for f in readdir(directory*"all/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
        res = similar(CSV.read(directory*"all/"*filelist[1], DataFrame),0)
        for subdir = ["all/"]
            filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
    #         res = similar(CSV.read(directory*subdir*filelist[1], DataFrame),0)
            for i in 1:length(filelist)
                try
                    aux = CSV.read(directory*subdir*filelist[i], DataFrame)
                    if any(aux[:,:pMissing] .> 0)
                        missingproba = unique(aux[aux[:,:pMissing] .> 0,:pMissing])[1]
                        aux[!,:pMissing] .= missingproba
                    end
                    res = vcat(res, aux)
                catch 
                    println("Error with ", directory*subdir*filelist[i])
                    push!(files_with_issues, directory*subdir*filelist[i])
                end
            end
        end

        # for subdir = ["xgb/"]
        #     try 
        #         filelist = [f for f in readdir("xgboost/"*directory*subdir) if endswith(f, ".csv")]
        #         for i in 1:length(filelist)
        #             res = vcat(res, CSV.read("xgboost/"*directory*subdir*filelist[i], DataFrame))
        #         end
        #     catch 
        #         println("No XGBoost results for "*directory*subdir)
        #     end
        # end

        # filter!(t -> t[:k] > 0, res) #Remove dataset with only a bias term
        res[!,:method_cat] = map(method_category, res[:,:method])
        res[!,:X_setting] .= "syn_X_"*m_model
        res[!,:Y_setting] .= "syn_Y_"*y_model

        for method in ["Oracle X", "Oracle XM", "Complete Features", "Imp-then-Reg 1", "Imp-then-Reg 2", "Imp-then-Reg 3", "Imp-then-Reg 4", "Imp-then-Reg 5", "Joint Imp-then-Reg", "Adaptive LR"]
            aux = filter(t -> startswith(t[:method], method), res)

            idcols = [:dataset, :X_setting, :Y_setting, :SNR, :k, :pMissing, :splitnum]
            gd = groupby(aux, idcols)

            aux = similar(aux, 0)
            for subdf in gd 
                scoremax = argmax(subdf[:,:score])
                push!(aux, subdf[scoremax,names(aux)])
            end
            aux[!,:method] .= method*" - best"

            res = vcat(res, aux)
        end
        
        CSV.write(directory*"FINAL_results.csv", res)   
    end
end

In [None]:
setting

In [None]:
files_with_issues

In [None]:
# setting = "synthetic/"
# # for y_model in ["linear", "tree", "nn"]
# for y_model in ["linear", "nn"]
#     for m_model = ["mar", "censoring"]
#         dir = y_model*"_"*m_model*"/"
#         directory = setting*dir
        
#         filelist = [f for f in readdir(directory*"final/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
#         res = similar(CSV.read(directory*"final/"*filelist[1], DataFrame),0)

#         for subdir = ["high_n/"]
#             filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
#     #         res = similar(CSV.read(directory*subdir*filelist[1], DataFrame),0)
#             for i in 1:length(filelist)
#                 aux = CSV.read(directory*subdir*filelist[i], DataFrame)
#                 if any(aux[:,:pMissing] .> 0)
#                     missingproba = unique(aux[aux[:,:pMissing] .> 0,:pMissing])[1]
#                     aux[!,:pMissing] .= missingproba
#                 end
#                 try
#                     res = vcat(res, aux)
#                 catch 
#                     println("Error with ", directory*subdir*filelist[i])
#                 end
#             end
#         end
#         # filter!(t -> t[:k] > 0, res) #Remove dataset with only a bias term
#         res[!,:method_cat] = map(method_category, res[:,:method])
#         res[!,:X_setting] .= "syn_X_"*m_model
#         res[!,:Y_setting] .= "syn_Y_"*y_model

#         CSV.write(directory*"HIGHN_results.csv", res)   
#     end
# end

In [None]:
1+1

Sanity check

In [None]:
df = CSV.read(prefix*"synthetic/linear_mar/FINAL_results.csv", DataFrame) 
df[!,:setting] .= "1 - Lin-MAR"

aux = CSV.read(prefix*"synthetic/linear_censoring/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "2 - Lin-NMAR"
df = vcat(df, aux)
 
# aux = CSV.read(prefix*"synthetic/tree_mar/FINAL_results.csv", DataFrame) 
# aux[!,:setting] .= "3 - Tree-MAR"
# df = vcat(df, aux)

# aux = CSV.read(prefix*"synthetic/tree_censoring/FINAL_results.csv", DataFrame) 
# aux[!,:setting] .= "4 - Tree-NMAR"
# df = vcat(df, aux)

aux = CSV.read(prefix*"synthetic/nn_mar/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "5 - NN-MAR"
df = vcat(df, aux)

aux = CSV.read(prefix*"synthetic/nn_censoring/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "6 - NN-NMAR"
df = vcat(df, aux)

;

In [None]:
df = CSV.read(prefix*"synthetic_discrete/linear_mar/FINAL_results.csv", DataFrame) 
df[!,:setting] .= "1 - Lin-MAR"

aux = CSV.read(prefix*"synthetic_discrete/linear_censoring/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "2 - Lin-NMAR"
df = vcat(df, aux)
 
# aux = CSV.read(prefix*"synthetic_discrete/tree_mar/FINAL_results.csv", DataFrame) 
# aux[!,:setting] .= "3 - Tree-MAR"
# df = vcat(df, aux)

# aux = CSV.read(prefix*"synthetic_discrete/tree_censoring/FINAL_results.csv", DataFrame) 
# aux[!,:setting] .= "4 - Tree-NMAR"
# df = vcat(df, aux)

aux = CSV.read(prefix*"synthetic_discrete/nn_mar/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "5 - NN-MAR"
df = vcat(df, aux)

aux = CSV.read(prefix*"synthetic_discrete/nn_censoring/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "6 - NN-NMAR"
df = vcat(df, aux)

In [None]:
combine(groupby(df, [:dataset, :method, :setting]), nrow)

In [None]:
unique(combine(groupby(df, [:dataset, :method, :setting]), nrow)[:,:nrow])

In [None]:
filter( t-> t[:nrow] < 90, combine(groupby(res, [:dataset, :method, :setting]), nrow))

In [None]:
pbdata = unique(filter( t -> t[:nrow] < 90, combine(groupby(df, [:dataset, :method, :setting]), nrow))[:,[:method, :setting, :dataset]])

In [None]:
aux = filter( t -> t[:dataset] ∈ pbdata && t[:nrow] < 10, combine(groupby(df, [:dataset, :method, :setting, :pMissing]), nrow))
unique(aux[:,[:dataset, :setting, :pMissing, :nrow]])

In [None]:
filter(t -> t[:pMissing] == 0.1, unique(aux[:,[:dataset, :setting, :pMissing, :nrow]]))

In [None]:
filter(t -> t[:pMissing] == 0.3, unique(aux[:,[:dataset, :setting, :pMissing, :nrow]]))

In [None]:
filter(t -> startswith(t[:setting], "5"), unique(aux[:,[:dataset, :setting, :pMissing, :nrow]]))