In [1]:
using CSV, DataFrames, Statistics

In [2]:
# prefix = "aistats-rev/"
prefix = "tmlr-rev/"

"tmlr-rev/"

In [3]:
function method_category(meth)
    if startswith(meth, "Imp-then-Reg")
        return "Imp-then-Reg"
    elseif startswith(meth, "Joint Imp-then-Reg")
        return "Joint Imp-then-Reg"
    elseif meth ∈ ["Static", "Affine", "Finite"]
        return "Adaptive LR"
    elseif startswith(meth, "Complete Features")
        return "Complete Features"
    else 
        return meth
    end
end

method_category (generic function with 1 method)

In [4]:
pb_datasets = ["cylinder-bands", "ozone-level-detection-eight", "ozone-level-detection-one", "thyroid-disease-thyroid-0387", "trains",
                "credit-approval", "Ecdat-Mofa", "sleep"]

8-element Vector{String}:
 "cylinder-bands"
 "ozone-level-detection-eight"
 "ozone-level-detection-one"
 "thyroid-disease-thyroid-0387"
 "trains"
 "credit-approval"
 "Ecdat-Mofa"
 "sleep"

## For Real X - Syn Y Experiments

In [5]:
# prefix ="aistats-rev/"
prefix = "tmlr-rev/"

setting = prefix*"fakey/"
for y_model in ["linear", "nn"]
    for m_model = ["mar", "nmar", "mar_adv"]
        dir = y_model*"_"*m_model*"/"
        directory = setting*dir
        
        # filelist = [f for f in readdir(directory*"all/") if endswith(f, ".csv")]
        # res = similar(CSV.read(directory*"all/"*filelist[1], DataFrame),0)
        
        # for subdir = ["all/", "itr/", "itr_nn/"]
        filelist = [f for f in readdir(directory*"itr_nn/") if endswith(f, ".csv")]
        res = similar(CSV.read(directory*"itr_nn/"*filelist[1], DataFrame),0)
        
        for subdir = ["itr_nn/"]
            filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv")]
#             res = similar(CSV.read(directory*subdir*filelist[1], DataFrame),0)
            for i in 1:length(filelist)
                res = vcat(res, CSV.read(directory*subdir*filelist[i], DataFrame))
            end
        end

        res[!,:method_cat] = map(t -> method_category(t), res[:,:method])
        res[!,:X_setting] .= "real_X_"*m_model
        res[!,:Y_setting] .= "syn_Y_"*y_model

        CSV.write(directory*"FINAL_results.csv", res)
    end
end

In [6]:
setting = prefix*"fakey/"

for y_model in ["linear", "nn"]
    for m_model = ["mar", "nmar", "mar_adv"]
        dir = y_model*"_"*m_model*"/"
        directory = setting*dir
  
        res = CSV.read(directory*"FINAL_results.csv", DataFrame)
        
        res[!,:method] .= map(t -> (t == "Affine" ? "Adaptive LR - Affine" : t), res[:,:method]) 
        res[!,:method] .= map(t -> (t == "Finite" ? "Adaptive LR - Finite" : t), res[:,:method]) 
        res[!,:method] .= map(t -> (t == "Static" ? "Adaptive LR - Affine intercept only" : t), res[:,:method])
        
        filter!(t -> t[:dataset] ∉ pb_datasets, res)

        for method in ["Oracle X", "Oracle XM", "Complete Features", "Imp-then-Reg 1", "Imp-then-Reg 2", "Imp-then-Reg 3", "Imp-then-Reg 4", "Imp-then-Reg 5", "Joint Imp-then-Reg", "Adaptive LR"]
            aux = filter(t -> startswith(t[:method], method), res)

            idcols = [:dataset, :X_setting, :Y_setting, :SNR, :k, :kMissing, :splitnum]
            gd = groupby(aux, idcols)

            aux = similar(aux, 0)
            for subdf in gd 
                scoremax = argmax(subdf[:,:score])
                push!(aux, subdf[scoremax,names(aux)])
            end
            aux[!,:method] .= method*" - best"

            res = vcat(res, aux)
        end
        
        CSV.write(directory*"FINAL_results.csv", res)
    end
end

Sanity check

In [7]:
res = DataFrames.similar(CSV.read(prefix*"fakey/"*"linear"*"_"*"mar"*"/FINAL_results.csv", DataFrame), 0)
for y_model in ["linear", "nn"]
    for m_model = ["mar", "nmar", "mar_adv"]
        dir = y_model*"_"*m_model*"/"
        directory = prefix*"fakey/"*dir
  
        res = vcat(res, CSV.read(directory*"FINAL_results.csv", DataFrame))
    end
end

In [8]:
gd = groupby(res, [:dataset, :X_setting, :Y_setting, :kMissing, :method])
aggres = combine(gd, nrow)

Row,dataset,X_setting,Y_setting,kMissing,method,nrow
Unnamed: 0_level_1,String,String15,String15,Int64,String31,Int64
1,COUNT-loomis,real_X_mar,syn_Y_linear,0,Imp-then-Reg 1 - nn,10
2,COUNT-loomis,real_X_mar,syn_Y_linear,0,Imp-then-Reg 2 - nn,10
3,COUNT-loomis,real_X_mar,syn_Y_linear,0,Imp-then-Reg 3 - nn,10
4,COUNT-loomis,real_X_mar,syn_Y_linear,0,Imp-then-Reg 4 - nn,10
5,COUNT-loomis,real_X_mar,syn_Y_linear,0,Imp-then-Reg 5 - nn,10
6,Ecdat-MCAS,real_X_mar,syn_Y_linear,0,Imp-then-Reg 1 - nn,10
7,Ecdat-MCAS,real_X_mar,syn_Y_linear,0,Imp-then-Reg 2 - nn,10
8,Ecdat-MCAS,real_X_mar,syn_Y_linear,0,Imp-then-Reg 3 - nn,10
9,Ecdat-MCAS,real_X_mar,syn_Y_linear,0,Imp-then-Reg 4 - nn,10
10,Ecdat-MCAS,real_X_mar,syn_Y_linear,0,Imp-then-Reg 5 - nn,10


In [9]:
feasible_combinations = unique(aggres[:,[:dataset, :kMissing]])
feasible_combinations = crossjoin(
                            crossjoin(
                                crossjoin(feasible_combinations, DataFrame(splitnum=1:10)),
                                DataFrame(X_setting = ["real_X_mar", "real_X_nmar", "real_X_mar_adv"])), 
                                DataFrame(Y_setting = ["syn_Y_linear", "syn_Y_nn"])
                            )

pbdatasets = antijoin(feasible_combinations, unique(res[:,[:dataset, :kMissing, :splitnum, :X_setting, :Y_setting, :method]]), on=[:dataset, :kMissing, :splitnum, :X_setting, :Y_setting])
pbdatasets = unique(pbdatasets[:,[:dataset, :X_setting, :Y_setting, :splitnum]])

dataset_list = [d for d in readdir("../datasets/") if !startswith(d, ".")]
sort!(dataset_list)

pbdatasets[!,:array_num] .= (pbdatasets[:,:splitnum] .- 1) .* 71
pbdatasets[!,:array_num] .+= map(t -> findfirst(t .== dataset_list)-1, pbdatasets[:,:dataset])

pbdatasets[!,:back_dnum] .= map(t -> dataset_list[mod(t, 71) + 1], pbdatasets[:,:array_num])
pbdatasets[!,:back_splitnum] .= map(t -> div(t, 71) + 1, pbdatasets[:,:array_num])

@assert all(pbdatasets[:,:back_dnum] .== pbdatasets[:,:dataset])
@assert all(pbdatasets[:,:back_splitnum] .== pbdatasets[:,:splitnum])

In [10]:
function list_to_slurmarray(l)
    s = ""
    begin_seq = -1; last_seq = -1; is_sequence = false

    for an in l
        if an == last_seq + 1
            last_seq = an
        else 
            if is_sequence
                if begin_seq == last_seq
                    s *= string(begin_seq)*","
                else
                    s *= string(begin_seq)*"-"*string(last_seq)*","
                end
            end
            begin_seq = an
            last_seq = an
            is_sequence = true
        end
    end 
    s
end

list_to_slurmarray (generic function with 1 method)

In [11]:
for y_model in ["linear", "nn"]
    println(y_model)
    for m_model = ["mar", "nmar", "mar_adv"]
        println(m_model)
        aux = filter(t -> t[:Y_setting] == "syn_Y_"*y_model && t[:X_setting] == "real_X_"*m_model, pbdatasets)
        l = sort(unique(aux[:,:array_num]))
        @show list_to_slurmarray(l)
    end
    println()
end

linear
mar
list_to_slurmarray(l) = "47,118,166,189,237,256,260,327,331,402,473,544,611,615,"
nmar
list_to_slurmarray(l) = "47,118,256,260,331,402,473,540,544,615,682,686,"
mar_adv
list_to_slurmarray(l) = "47,118,185,189,256,260,331,402,473,544,611,615,"

nn
mar
list_to_slurmarray(l) = "47,114,118,189,256,260,327,331,402,473,611,615,"
nmar
list_to_slurmarray(l) = "47,118,185,189,256,260,331,402,473,544,615,"
mar_adv
list_to_slurmarray(l) = "47,118,189,256,260,331,402,473,544,615,686,"



In [None]:
47,114,118,166,185,189,237,256,260,327,331,402,473,540,544,611,615,682,686

In [54]:
for y_model in ["linear", "nn"]
    println(y_model)
    for m_model = ["mar", "nmar", "mar_adv"]
        println(m_model)
        aux = filter(t -> t[:Y_setting] == "syn_Y_"*y_model && t[:X_setting] == "real_X_"*m_model, pbdatasets)
        l = sort(unique(aux[:,:array_num]))
        @show list_to_slurmarray(l)
    end
    println()
end

linear
mar
list_to_slurmarray(l) = "49,120,166,185,189,191,237,256,260,262,308,327,331,333,379,398,402,404,450,469,473,475,521,540,544,546,592,611,615,617,663,682,686,"
nmar
list_to_slurmarray(l) = "49,120,165-166,189,191,236-237,260,262,307-308,327,331,333,347,378-379,398,402,404,418,449-450,469,473,475,489,491,520-521,540,544,546,560,562,591-592,611,615,617,631,633,662-663,682,686,688,702,"
mar_adv
list_to_slurmarray(l) = "24,95,120,166,189,191,237,256,260,262,308,327,331,333,379,398,402,404,450,469,473,475,521,540,544,546,592,611,615,617,663,682,686,"

nn
mar
list_to_slurmarray(l) = "49,95,118,120,166,185,189,191,237,256,260,262,308,327,331,333,379,398,402,404,450,469,473,475,521,540,544,546,592,611,615,617,663,682,686,"
nmar
list_to_slurmarray(l) = "49,120,189,191,260,262,327,331,333,398,402,404,469,473,475,540,544,546,592,611,615,617,662-663,682,686,"
mar_adv
list_to_slurmarray(l) = "24,49,67,95,118,120,138,166,189,191,209,237,260,262,280,308,331,333,351,379,398,402,404,422,450,46

## For Real Data Experiments

In [12]:
prefix

"tmlr-rev/"

In [9]:
for directory = prefix .* ["realy/"]
    filelist = [f for f in readdir(directory*"all/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
    res = similar(CSV.read(directory*"all/"*filelist[1], DataFrame),0)
    
    for subdir = ["all/", "itr/", "jitr/", "itr_nn/"]
        filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
#         res = similar(CSV.read(directory*subdir*filelist[1], DataFrame),0)
        for i in 1:length(filelist)
            res = vcat(res, CSV.read(directory*subdir*filelist[i], DataFrame))
        end
    end
    
    # filter!(t -> t[:k] > 0, res) #Remove dataset with only a bias term
    res[!,:method_cat] = map(method_category, res[:,:method])
    res[!,:X_setting] .= "real_X"
    res[!,:Y_setting] .= "real_Y"
    
    CSV.write(directory*"FINAL_results.csv", res)
end

Create `best` variant

In [36]:
res = CSV.read(prefix*"realy/"*"FINAL_results.csv", DataFrame)

filter!(t -> t[:dataset] ∉ pb_datasets, res)

res[!,:method] .= map(t -> (t == "Affine" ? "Adaptive LR - Affine" : t), res[:,:method]) 
res[!,:method] .= map(t -> (t == "Finite" ? "Adaptive LR - Finite" : t), res[:,:method]) 
res[!,:method] .= map(t -> (t == "Static" ? "Adaptive LR - Affine intercept only" : t), res[:,:method])

for method in ["Complete Features", "Imp-then-Reg 1", "Imp-then-Reg 2", "Imp-then-Reg 3", "Imp-then-Reg 4", "Imp-then-Reg 5", "Joint Imp-then-Reg", "Adaptive LR"]
    aux = filter(t -> startswith(t[:method], method), res)
    # @show size(aux)
    idcols = [:dataset, :SNR, :k, :kMissing, :splitnum]
    gd = groupby(aux, idcols)

    aux = similar(aux, 0)
    for subdf in gd 
        scoremax = argmax(subdf[:,:score])
        push!(aux, subdf[scoremax,names(aux)])
    end
    aux[!,:method] .= method*" - best"

    res = vcat(res, aux)
end

CSV.write(prefix*"realy/"*"FINAL_results.csv", res)

"aistats-rev/realy/FINAL_results.csv"

Sanity check

In [10]:
res = CSV.read(prefix*"realy/"*"FINAL_results.csv", DataFrame)
unique(filter( t-> t[:nrow] < 10, combine(groupby(res, [:dataset, :method]), nrow))[:,[:dataset, :nrow]])

Row,dataset,nrow
Unnamed: 0_level_1,String,Int64
1,mlmRev-star,6
2,pscl-politicalInformation,3


In [16]:
feasible_combinations = unique(res[:,[:dataset]])
feasible_combinations = crossjoin(feasible_combinations, DataFrame(splitnum=1:10))

pbdatasets = leftjoin(feasible_combinations, res, on=[:dataset, :splitnum])
gd = groupby(pbdatasets, [:dataset, :splitnum])
sort(combine(gd, nrow), :nrow)

# unique(res[:,[:dataset, :kMissing, :splitnum, :X_setting, :Y_setting]]), on=[:dataset, :kMissing, :splitnum, :X_setting, :Y_setting])
# pbdatasets = unique(pbdatasets[:,[:dataset, :X_setting, :Y_setting, :splitnum]])

# dataset_list = [d for d in readdir("../datasets/") if !startswith(d, ".")]
# sort!(dataset_list)

# pbdatasets[!,:array_num] .= (pbdatasets[:,:splitnum] .- 1) .* 71
# pbdatasets[!,:array_num] .+= map(t -> findfirst(t .== dataset_list)-1, pbdatasets[:,:dataset])

# pbdatasets[!,:back_dnum] .= map(t -> dataset_list[mod(t, 71) + 1], pbdatasets[:,:array_num])
# pbdatasets[!,:back_splitnum] .= map(t -> div(t, 71) + 1, pbdatasets[:,:array_num])

# @assert all(pbdatasets[:,:back_dnum] .== pbdatasets[:,:dataset])
# @assert all(pbdatasets[:,:back_splitnum] .== pbdatasets[:,:splitnum])

Row,dataset,splitnum,nrow
Unnamed: 0_level_1,String,Int64,Int64
1,COUNT-loomis,1,41
2,COUNT-loomis,2,41
3,COUNT-loomis,3,41
4,COUNT-loomis,4,41
5,COUNT-loomis,5,41
6,COUNT-loomis,6,41
7,COUNT-loomis,7,41
8,COUNT-loomis,8,41
9,COUNT-loomis,9,41
10,COUNT-loomis,10,41


In [12]:
dataset_list = [d for d in readdir("../datasets/") if !startswith(d, ".")]
sort!(dataset_list)

71-element Vector{String}:
 "COUNT-loomis"
 "Ecdat-MCAS"
 "Ecdat-Males"
 "Ecdat-Mofa"
 "Ecdat-RetSchool"
 "Ecdat-Schooling"
 "MASS-Cars93"
 "MASS-Pima.tr2"
 "MASS-survey"
 "Zelig-coalition2"
 ⋮
 "thyroid-disease-allhyper"
 "thyroid-disease-allhypo"
 "thyroid-disease-allrep"
 "thyroid-disease-dis"
 "thyroid-disease-sick"
 "thyroid-disease-sick-euthyroid"
 "thyroid-disease-thyroid-0387"
 "trains"
 "wiki4he"

In [13]:
unique(filter(t -> t[:nrow] < 10, combine(groupby(filter(t -> !startswith(t[:method], "Imp-then-Reg 4 - linear"), res), [:dataset, :method]), nrow))[:,:dataset])

2-element Vector{String}:
 "mlmRev-star"
 "pscl-politicalInformation"

In [14]:
unfinished_datasets = filter(t -> t[:nrow] < 10, 
    combine(
        groupby(    combine(groupby(filter(t -> !startswith(t[:method], "Imp-then-Reg 4 - linear"), res), [:dataset, :method]), nrow), 
                    [:dataset]), 
        :nrow => minimum => :nrow))[:,:dataset]
sort([findfirst(i .== dataset_list)-1 for i in unfinished_datasets])

2-element Vector{Int64}:
 43
 49

In [15]:
unfinished_datasets = filter(t -> t[:nrow] < 10, 
    combine(
        groupby(    combine(groupby(res, [:dataset, :method]), nrow), 
                    [:dataset]), 
        :nrow => minimum => :nrow))[:,:dataset]
sort([findfirst(i .== dataset_list)-1 for i in unfinished_datasets])

2-element Vector{Int64}:
 43
 49

In [21]:
unfinished_datasets = filter(t -> t[:nrow] < 10, 
    combine(
        groupby(    combine(groupby(filter(t -> !startswith(t[:method], "Imp-then-Reg"), res), [:dataset, :method]), nrow), 
                    [:dataset]), 
        :nrow => minimum => :nrow))[:,:dataset]
sort([findfirst(i .== dataset_list)-1 for i in unfinished_datasets])

Any[]

In [41]:
sort(union([71*i + (17) for i in 0:9], [71*i + 49 for i in 0:9]))

20-element Vector{Int64}:
  43
  49
 114
 120
 185
 191
 256
 262
 327
 333
 398
 404
 469
 475
 540
 546
 611
 617
 682
 688

In [40]:
array_num = 545
@show d_num = mod(array_num, 71) + 1
@show iter_do = div(array_num, 71) + 1

d_num = mod(array_num, 71) + 1 = 49
iter_do = div(array_num, 71) + 1 = 8


8

# For Synthetic-Data Experiments

In [30]:
prefix

"aistats-rev/"

In [31]:
setting = prefix*"synthetic_discrete/"
for y_model in ["linear", "nn"]
    for m_model = ["mar", "censoring"]
        dir = y_model*"_"*m_model*"/"
        directory = setting*dir

        # filelist = [f for f in readdir(directory*"itr_nn/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
        # res = similar(CSV.read(directory*"itr_nn/"*filelist[1], DataFrame),0)

        # for subdir = ["itr_nn/"]

        filelist = [f for f in readdir(directory*"all/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
        res = similar(CSV.read(directory*"all/"*filelist[1], DataFrame),0)

        for subdir = ["all/", "itr/", "itr_nn/"]
            filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
            # @show filelist

            for i in 1:length(filelist)
                aux = CSV.read(directory*subdir*filelist[i], DataFrame)
                try
                    res = vcat(res, aux)
                catch 
                    println("Error with ", directory*subdir*filelist[i])
                end
            end
        end

        res[!,:method] .= map(t -> replace(t, "Encoding as new category" => "Imp-then-Reg 4"), res[:,:method])
        res[!,:method] .= map(t -> replace(t, "Mode impute" => "Imp-then-Reg 5"), res[:,:method])

        # filter!(t -> t[:k] > 0, res) #Remove dataset with only a bias term
        res[!,:method_cat] = map(method_category, res[:,:method])
        res[!,:X_setting] .= "syn_X_"*m_model
        res[!,:Y_setting] .= "syn_Y_"*y_model

        for method in ["Imp-then-Reg 4", "Imp-then-Reg 5"]
            aux = filter(t -> startswith(t[:method], method), res)
            # @show nrow(aux)
            idcols = [:dataset, :X_setting, :Y_setting, :SNR, :k, :pMissing, :splitnum]
            gd = groupby(aux, idcols)

            aux = similar(aux, 0)
            for subdf in gd 
                scoremax = argmax(subdf[:,:score])
                # @show subdf[scoremax,names(aux)]
                push!(aux, subdf[scoremax,names(aux)])
            end
            aux[!,:method] .= method*" - best"

            res = vcat(res, aux)
        end

        CSV.write(directory*"FINAL_results.csv", res)   
    end
end

In [42]:
prefix

"aistats-rev/"

In [43]:
setting = prefix*"synthetic/"
files_with_issues = []
# for y_model in ["linear", "tree", "nn"]
for y_model in ["linear", "nn"]
    for m_model = ["mar", "censoring"]
        dir = y_model*"_"*m_model*"/"
        directory = setting*dir
        # filelist = [f for f in readdir(directory*"itr_nn/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
        
        # res = similar(CSV.read(directory*"itr_nn/"*filelist[1], DataFrame),0)
        # for subdir = ["itr_nn/"]

        filelist = [f for f in readdir(directory*"all/") if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]
        
        res = similar(CSV.read(directory*"all/"*filelist[1], DataFrame),0)
        for subdir = ["all/","itr_nn/"]
            filelist = [f for f in readdir(directory*subdir) if endswith(f, ".csv") && f ∉ ["all_results.csv","all_results_new.csv"]]

            for i in 1:length(filelist)
                try
                    aux = CSV.read(directory*subdir*filelist[i], DataFrame)
                    if any(aux[:,:pMissing] .> 0)
                        missingproba = unique(aux[aux[:,:pMissing] .> 0,:pMissing])[1]
                        aux[!,:pMissing] .= missingproba
                    end
                    res = vcat(res, aux)
                catch 
                    println("Error with ", directory*subdir*filelist[i])
                    push!(files_with_issues, directory*subdir*filelist[i])
                end
            end
        end

        res[!,:method_cat] = map(method_category, res[:,:method])
        res[!,:X_setting] .= "syn_X_"*m_model
        res[!,:Y_setting] .= "syn_Y_"*y_model

        for method in ["Oracle X", "Oracle XM", "Complete Features", "Imp-then-Reg 1", "Imp-then-Reg 2", "Imp-then-Reg 3", "Imp-then-Reg 4", "Imp-then-Reg 5", "Joint Imp-then-Reg", "Adaptive LR"]
            aux = filter(t -> startswith(t[:method], method), res)

            idcols = [:dataset, :X_setting, :Y_setting, :SNR, :k, :pMissing, :splitnum]
            gd = groupby(aux, idcols)

            aux = similar(aux, 0)
            for subdf in gd 
                scoremax = argmax(subdf[:,:score])
                push!(aux, subdf[scoremax,names(aux)])
            end
            aux[!,:method] .= method*" - best"

            res = vcat(res, aux)
        end
        
        CSV.write(directory*"FINAL_results.csv", res)   
    end
end

In [44]:
1+1

2

Sanity check

In [20]:
df = CSV.read(prefix*"synthetic/linear_mar/FINAL_results.csv", DataFrame) 
df[!,:setting] .= "1 - Lin-MAR"

aux = CSV.read(prefix*"synthetic/linear_censoring/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "2 - Lin-NMAR"
df = vcat(df, aux)

aux = CSV.read(prefix*"synthetic/nn_mar/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "5 - NN-MAR"
df = vcat(df, aux)

aux = CSV.read(prefix*"synthetic/nn_censoring/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "6 - NN-NMAR"
df = vcat(df, aux)

;

In [None]:
df = CSV.read(prefix*"synthetic_discrete/linear_mar/FINAL_results.csv", DataFrame) 
df[!,:setting] .= "1 - Lin-MAR"

aux = CSV.read(prefix*"synthetic_discrete/linear_censoring/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "2 - Lin-NMAR"
df = vcat(df, aux)
 
# aux = CSV.read(prefix*"synthetic_discrete/tree_mar/FINAL_results.csv", DataFrame) 
# aux[!,:setting] .= "3 - Tree-MAR"
# df = vcat(df, aux)

# aux = CSV.read(prefix*"synthetic_discrete/tree_censoring/FINAL_results.csv", DataFrame) 
# aux[!,:setting] .= "4 - Tree-NMAR"
# df = vcat(df, aux)

aux = CSV.read(prefix*"synthetic_discrete/nn_mar/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "5 - NN-MAR"
df = vcat(df, aux)

aux = CSV.read(prefix*"synthetic_discrete/nn_censoring/FINAL_results.csv", DataFrame) 
aux[!,:setting] .= "6 - NN-NMAR"
df = vcat(df, aux)

In [21]:
combine(groupby(df, [:dataset, :method, :setting]), nrow)

Row,dataset,method,setting,nrow
Unnamed: 0_level_1,String15,String31,String,Int64
1,n_1000_p_10,Imp-then-Reg 1 - nn,1 - Lin-MAR,80
2,n_1000_p_10,Imp-then-Reg 2 - nn,1 - Lin-MAR,80
3,n_1000_p_10,Imp-then-Reg 3 - nn,1 - Lin-MAR,80
4,n_1000_p_10,Imp-then-Reg 4 - nn,1 - Lin-MAR,80
5,n_100_p_10,Imp-then-Reg 1 - nn,1 - Lin-MAR,80
6,n_100_p_10,Imp-then-Reg 2 - nn,1 - Lin-MAR,80
7,n_100_p_10,Imp-then-Reg 3 - nn,1 - Lin-MAR,80
8,n_100_p_10,Imp-then-Reg 4 - nn,1 - Lin-MAR,80
9,n_120_p_10,Imp-then-Reg 1 - nn,1 - Lin-MAR,80
10,n_120_p_10,Imp-then-Reg 2 - nn,1 - Lin-MAR,80


In [22]:
unique(combine(groupby(df, [:dataset, :method, :setting]), nrow)[:,:nrow])

2-element Vector{Int64}:
 80
 70

In [25]:
df

Row,dataset,SNR,k,pMissing,splitnum,method,r2,osr2,r2list,osr2list,muvec,time,hp,score,method_cat,X_setting,Y_setting,setting
Unnamed: 0_level_1,String15,Int64,Int64,Float64,Int64,String31,Float64,Float64,String,String,String,Float64,String31,Float64,String15,String15,String15,String
1,n_1000_p_10,2,5,0.1,1,Imp-then-Reg 1 - nn,0.783002,0.751881,"Any[0.7665171843286452, 0.7893688177528002, 0.817890224476437, 0.8036376354991142, 0.8596366435831156, 0.6794559329720173, 0.8255995802632057, 0.7932532468734835, 0.6801241320648609, 0.7697311410179672, 0.7853428465486968, -0.14125156335652678, -2.6213999614988786, 0.7836464677029975, 0.48283759691039496, 0.6985706718146727, 0.8828181855263525, 0.613880113666227, 0.8529106628148347, 0.26800333636336826, 0.4312812175230756, 0.32808287495344757, 0.7954332113661159, 0.6461918753114773, 0.8413415048750846, 0.7482858639789494, 0.45331449723493056, -0.319303024917974, 0.9435875483957107, 0.609444513131788, 0.8375865206899261, -0.4003122840987927, 0.36382307143911496, -0.09835751708094853, 0.7673083877612625, 0.8710774660429738, 0.8497286891177294, 0.5302368964187063, 0.5509431808908263, 0.9357898844478688, 0.6173602852352, -Inf, 0.11734489013879679, 0.5492124607929425, 0.8774381044892472, 0.4961494381574233, 0.26043816106758044, 0.9318871916838743, 0.8340444739105703, 0.6950496067109759, 0.6996386744453672, -4.068040894228982, 0.7487812165423975, 0.9192184101213814, 0.9103397566045368, -Inf, 0.868464490317837, -Inf, -26.194196960340797, 0.9879315708713736, -Inf, -Inf, -Inf, -Inf, -Inf, 0.9562525904598034, -Inf, -Inf, -Inf, 0.36146436668297743, -Inf, -Inf, -Inf, -Inf, 0.03581616526594078, -Inf, -Inf, -6.706069592621754, 0.8791090182581316, -Inf, -Inf, -7.353232277921936, -Inf, -Inf, -Inf, -Inf, 0.8766534149488281, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, 0.11915598568978736, -Inf, 0.7329947531807602]","Any[0.7533434727434916, 0.8037345427780918, 0.801091541948193, 0.8007502185610421, 0.7593401249053844, 0.7953275600860703, 0.7104046230222195, 0.7896755196027223, 0.7976046380948352, 0.7348855590308163, 0.7843037337786878, 0.7762867554262245, 0.2606144580701999, 0.8207606392407211, 0.732780895226183, 0.8076320574830487, 0.9073364799484116, 0.6911663494068845, 0.6719979652951384, 0.8859652509898766, 0.7328795072402021, 0.7980052642660229, 0.8934376901623409, 0.802096588017864, 0.8701239974730035, 0.8478849214126659, 0.583579958875988, 0.9352982485171242, 0.7364575044174088, 0.7263647011033944, 0.7243856002901728, 0.8471031669051663, 0.9088612467233923, 0.8229505915793724, 0.8937747827013545, 0.8133201220031822, 0.8659186494466297, 0.6486402106480877, 0.6397369309060714, 0.7240070603104105, 0.8886599489996887, 0.8432553668484993, 0.6813032572836998, 0.7443056106764531, 0.7315750611175126, 0.48032726104171986, 0.7905149315000028, 0.6810310992022784, 0.9176607783529119, 0.8507442460822956, 0.7665969506205166, 0.8769600648907918, 0.8433764792654727, 0.6148707254401669, 0.9069935448001605, 0.7167954529733869, 0.7420524532711759, 0.9567343061692194, 0.505121207810054, 0.81418855777992, 0.8728106686162509, 0.6903808182616198, 0.9759403767751433, 0.9898534388495667, 0.320505167926451, 0.974180438883839, 0.27748718852662624, 0.9583687821634513, 0.9248942225551539, 0.8922518436649951, 0.9905967953608596, 0.9614222082125097, 0.5592893556705427, 0.9333689186420774, NaN, 0.9871552174054466, 0.46362290119727223, -55.52448204205184, 0.9675299011821044, 0.9607117609873352, 0.9523900151543707, -405.12377523098894, 0.8132824812298787, 0.34888789864703895, 0.6052839845803966, 0.8796903402451017, NaN, -0.6092324821737163, 0.9969813443984485, -0.9926120429373997, 0.9212446959641847, 0.9919953863878542, 0.9954529716678489, 0.9994265414934197, NaN, -2.260342809991455, 0.7307303641950899]",Any[],104.997,Dict(:hidden_nodes => 5),0.771493,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
2,n_1000_p_10,2,5,0.1,1,Imp-then-Reg 2 - nn,0.782897,0.754546,"Any[0.7695939310834479, 0.7958369386061889, 0.827814557522069, 0.8124808499360384, 0.8544290933216263, 0.708769247897624, 0.8238413029996138, 0.7783586825160131, 0.6369868680916724, 0.7647952856299172, 0.8085629997570332, -0.020778675987531914, -1.1891065228864166, 0.7077365926033982, 0.5602674445804408, 0.6947784948622318, 0.9206853581879901, 0.5987470137226976, 0.8559958934138292, 0.20086105771632234, 0.5379175403254608, 0.35448048765976137, 0.7723925617106107, 0.5157676799946905, 0.850736088355077, 0.7392890624144961, 0.3114193461440736, 0.07373379100688571, 0.8511785024911431, 0.6488826511439205, 0.8177883067963392, -0.25654814668254233, 0.34142180252845533, 0.056894729858257054, 0.5508596989375744, 0.84582371908937, 0.8494927525957112, 0.3263251995978159, 0.4632315424425151, 0.9690389600841008, 0.515034430047064, -Inf, 0.42540663415138713, 0.5093301265142569, 0.9148947701563377, 0.5567094968973754, 0.16301179146936562, 0.8959761991549539, 0.7990286955663712, 0.7216664024505351, 0.6468482688433377, -6.087204592110704, 0.7333693038751192, 0.927830799039229, 0.8683075817987834, -Inf, 0.8668733779427975, -Inf, -23.76296159081411, -1.721503800261969, -Inf, -Inf, -Inf, -Inf, -Inf, 0.8772305471331727, -Inf, -Inf, -Inf, 0.06111365529466117, -Inf, -Inf, -Inf, -Inf, 0.018392097098101856, -Inf, -Inf, -7.9328802402687195, 0.9271954130310256, -Inf, -Inf, -6.748401914438455, -Inf, -Inf, -Inf, -Inf, 0.8072834374699871, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -15.853480246842107, -Inf, 0.7497594111657485]","Any[0.7521681012378905, 0.8126629783176237, 0.8048579471688717, 0.7999280740232624, 0.766688318602418, 0.7819688816916097, 0.6973782155005628, 0.7877361223969955, 0.7931621161018239, 0.736278978012374, 0.794466800296958, 0.7682462934069024, 0.3787257666893511, 0.8302368497168655, 0.7569165192018001, 0.8429676075660077, 0.9139018779528438, 0.6882268256552164, 0.6515020829261398, 0.8794748282369214, 0.7242776045123211, 0.7716087021858555, 0.865272396219245, 0.8271648613269447, 0.899149753564218, 0.8468126028626084, 0.6092986002179042, 0.9314684786165909, 0.7532446130601375, 0.7280467323446524, 0.7054619961027493, 0.8516071832983716, 0.9151337023989542, 0.8353819694666139, 0.9102310048590457, 0.8284675179383485, 0.8523934501961714, 0.6423099559384536, 0.6219547553711251, 0.7014072000298008, 0.8829970501739517, 0.8302512754085627, 0.683425618560443, 0.7465933776529302, 0.7163030267243915, 0.44694638070215176, 0.7883999024134154, 0.6120973843215418, 0.920660784707393, 0.8656948037519019, 0.7494581446085709, 0.8933590858151238, 0.8483755433781045, 0.5853930026019574, 0.8838589883307574, 0.68879496128795, 0.7394542427916451, 0.9300854049930224, 0.6185309342561682, -1.1373707481623918, 0.8541197261372468, 0.6778341636982071, 0.9756338696099099, 0.9867927284578659, 0.5271235281509936, 0.9920038490423388, -0.015203931995264597, 0.9751212254760645, 0.9315804816226486, 0.9158294364945033, 0.974781382325438, 0.9709774334614083, 0.30325395332864613, 0.9310115452356211, NaN, 0.9979333037612477, 0.6859695056002493, -20.680287100495082, 0.9741089615354155, 0.9717593215036416, 0.9685753599425421, -374.8476950104407, 0.8885675989307297, 0.49049286508494805, 0.7848834830720819, 0.9306427873063838, NaN, 0.7330043627034505, 0.9578457087882754, -1.004188217074487, 0.8627905388746467, 0.9895135238061091, 0.9970494735169647, 0.9980918477133427, NaN, -6.629448684013537, 0.7476450565391904]",Any[],34.9523,Dict(:hidden_nodes => 5),0.76351,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
3,n_1000_p_10,2,5,0.1,1,Imp-then-Reg 3 - nn,0.782897,0.750016,"Any[0.7695939416516333, 0.7958369526866782, 0.8278145640625207, 0.8124808354659239, 0.8544290895660229, 0.7087692589112391, 0.8238413012544599, 0.7783586773569674, 0.6369868661215787, 0.7647952533283091, 0.8085630135075417, -0.020778445395478906, -1.1891055660540832, 0.7077366887606142, 0.5602674265551975, 0.6947784541850772, 0.9206853474053175, 0.5987469299336079, 0.8559958954823075, 0.20086105771632234, 0.5379175108047054, 0.35448059121376707, 0.7723925493486936, 0.5157677052471549, 0.8507360858830431, 0.7392890605081506, 0.31141934758392387, 0.07373389278720888, 0.8511784864352053, 0.6488826008166027, 0.8177882933551301, -0.2565481947916959, 0.3414217165319471, 0.056894587077069514, 0.5508597822858282, 0.8458237233418163, 0.8494928872877417, 0.3263251995978159, 0.46323148241814227, 0.9690389662200491, 0.5150344884057507, -Inf, 0.425406642594943, 0.5093302383580417, 0.9148947886612724, 0.5567094967881682, 0.16301185973168153, 0.8959761901680168, 0.7990287436897183, 0.7216664008860303, 0.6468482382832296, -6.087205213107657, 0.7333693038751192, 0.9278307949426374, 0.8683074677076876, -Inf, 0.8668734410655881, -Inf, -23.7629621926325, -1.721503800261969, -Inf, -Inf, -Inf, -Inf, -Inf, 0.8772305139950777, -Inf, -Inf, -Inf, 0.06111365529466117, -Inf, -Inf, -Inf, -Inf, 0.018392048928594362, -Inf, -Inf, -7.93287757095951, 0.9271954225944553, -Inf, -Inf, -6.748403023859631, -Inf, -Inf, -Inf, -Inf, 0.807283292956117, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -15.853452472466433, -Inf, 0.7497594053484748]","Any[0.7521680994299383, 0.8123904300766639, 0.8040011960516338, 0.7996519155370673, 0.7663001016596891, 0.7809210530276217, 0.6980563026586246, 0.7874439666962774, 0.7927511384809067, 0.7370440791802451, 0.794628731186694, 0.7699219261032517, 0.16706963083381943, 0.8141231087693638, 0.7612707046169328, 0.8311941870887949, 0.914301819808216, 0.6908136751963966, 0.6537614891483523, 0.8785114620171054, 0.7269152651934047, 0.7712527645110495, 0.877733591849323, 0.8276294887122486, 0.8999718037648458, 0.8468195786481199, 0.6074067611418328, 0.9322488949966774, 0.754446717967485, 0.7312826382207773, 0.7111491448045101, 0.852810689399898, 0.9132013173996878, 0.8363869799858668, 0.9091935092637233, 0.828961186686889, 0.8527340817203795, 0.6416626303893624, 0.6261747580149295, 0.704120695434774, 0.8830364474658359, 0.8294420970853376, 0.6799462915920408, 0.7534720251163912, 0.713352835159247, 0.44966577696424526, 0.7896330422686428, 0.6140944845761758, 0.9212209841987675, 0.8633302442049511, 0.7497760348089509, 0.8924186187827025, 0.8485477167162067, 0.5804838229821985, 0.8842525059877345, 0.6957096932155452, 0.7363884172029354, 0.9149955869296442, 0.612933902828444, 0.7748957682070254, 0.8595757966489567, 0.6743709763906585, 0.979161516232696, 0.99509027668682, 0.5005095765410219, 0.9918870721184193, 0.18055269828967968, 0.9682834499185693, 0.9549620167636645, 0.913217039923502, 0.9763025003933516, 0.9757595754140497, 0.3024160962257302, 0.9337352350633826, NaN, 0.9987335790201997, 0.9100053774677388, -5.516860535006561, 0.9712494551059435, 0.9720198253594454, 0.9633858255218835, -358.1248289462613, 0.8703071147390662, 0.3586674004396836, 0.6881919417777069, 0.9156974747625289, NaN, 0.6774191059032785, 0.9855516006418766, -2.317012115408258, 0.8656088890110161, 0.9910379737540359, 0.9969193152257196, 0.9985438195031417, NaN, -3.031179313696021, 0.7340433820173857]",Any[],33.3299,Dict(:hidden_nodes => 5),0.76351,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
4,n_1000_p_10,2,5,0.1,1,Imp-then-Reg 4 - nn,0.733965,0.706336,"Any[0.7605807795236998, 0.7634209003276502, 0.795501981440903, 0.7051564685946253, 0.8878488518144565, 0.5822187780675685, 0.8417237314952828, 0.7630200470329874, 0.6673854962768224, 0.4440774842623657, 0.7742502105000912, -0.46028494391109986, -0.33885831084742235, 0.92828063095226, -0.3072833113663609, 0.5771419162614879, 0.8713327465371642, 0.5343843075644699, 0.9119651911613658, -0.08327089145391708, -0.20240886795371504, -0.19932833891369284, 0.7042731570114258, 0.957392421471461, 0.6891324453452699, 0.7280057730396803, 0.5257341665713793, -0.5559607104636988, 0.8922482737599786, 0.6051483910911541, 0.5448365364306574, -0.9110944118830671, -0.37443335936621946, -0.15538692654191455, 0.7583465940782543, 0.804515480578048, 0.7536199940432141, -0.6323949520973826, 0.47954367267217324, 0.5672789173147061, 0.1895516577910329, -Inf, 0.47086379450749727, 0.6797708037941008, 0.9452380994442241, 0.3067173792400051, 0.2962643879309851, 0.818379557947117, 0.5627095188067072, 0.6257184251899214, 0.6035497142080817, -16.428507670505187, 0.5777249137394478, 0.9264624606954023, 0.9111984124711572, -Inf, 0.8373958647834616, -Inf, -2.033105485790371, 0.7203423679908647, -Inf, -Inf, -Inf, -Inf, -Inf, 0.8103118890562873, -Inf, -Inf, -Inf, -3.0823651591877175, -Inf, -Inf, -Inf, -Inf, 0.33400441807047676, -Inf, -Inf, -5.173059018520357, 0.5876802472623965, -Inf, -Inf, -6.450071369631795, -Inf, -Inf, -Inf, -Inf, 0.4688024139614889, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -47.82713250515329, -Inf, 0.6140961671656445]","Any[0.7448310331115977, 0.795218173605973, 0.7583328022946928, 0.7585617660335339, 0.7594753406220568, 0.7690065518684406, 0.6705681273761129, 0.7760313539470453, 0.7943118541555901, 0.33594438803391435, 0.7595136342465623, 0.7592170392602272, 0.3261855733353197, 0.7570865230253282, 0.5166904732074097, 0.7302563713615071, 0.8977349509103848, 0.5515024401196975, 0.6445669260908651, 0.8922342296577563, 0.6386180643793228, 0.7968715029994429, 0.8571030305764274, 0.7746664091765094, 0.8464330928633592, 0.5219095265569931, 0.48462523487076214, 0.8248020359977007, 0.6726583554428207, 0.3458246547318724, 0.42220064128176293, 0.7976647137930173, 0.8895207878245436, 0.7261748436241923, 0.47893340088919956, 0.8126988344846575, 0.8436566046095951, 0.6530754124473082, 0.5856358954438008, 0.5267347887858054, 0.8865490427568574, 0.768092696890196, 0.6799575419133561, 0.632361693393271, 0.7456056433184635, 0.4235032766050755, 0.8217402920737599, 0.70174820984087, 0.6204044642180758, 0.8689517655396923, 0.7940597973053841, 0.8251637575568261, 0.7523013416897135, 0.6103225138854386, 0.8717593797245745, 0.75537993724204, 0.7750344283185737, 0.9567490543650417, -0.16281504811226455, 0.1616027915138355, 0.7666918365771874, 0.3983821242255987, 0.9660340293143368, 0.9379563599834282, -0.3733776550270793, 0.8691827527973193, -0.12294298332071385, 0.9833680817005382, 0.8986760125832389, 0.9209952057171302, 0.9688538237554045, 0.9657577043393641, 0.7722997824925335, 0.8365740106204616, NaN, 0.9991383742748394, 0.42965039137570604, -61.80995082094497, 0.9099899658628816, 0.93649299919285, 0.878865824403205, -275.6345813785419, 0.8924228703224191, -2.8994498156777255, 0.9159452514653921, 0.8959539614185411, NaN, 0.7785091742249702, 0.05021736924923692, -0.05342967389480946, 0.8664153970234746, 0.8013680549445384, 0.9867243213952334, 0.9999624824563382, NaN, -9.762324970613118, 0.7105725900517874]","Any[-0.42866392298366984, 1.0251187628444092, -1.0368463398831775, -0.3417904042252529, -0.5864453739417947, 1.018953445500854, -0.33199172783580433, -1.2494065855290715, -0.4943958222281771, 0.047070671165473775]",44.2912,Dict(:hidden_nodes => 5),0.71201,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
5,n_1000_p_10,2,5,0.1,10,Imp-then-Reg 1 - nn,0.427568,0.389282,"Any[0.4366348799930332, 0.5436226762274985, 0.19010290077451863, 0.3306631463447812, 0.38777194494981504, 0.47449666159102566, 0.41465205929207993, 0.5202751752177699, 0.40831741266961974, 0.05541533478359428, 0.3975317072611525, -3.02456976577825, -3.4809131328985217, 0.11539845681913641, 0.6070047852054365, -0.5022048449780623, 0.5061178911995885, 0.627589488753527, 0.22845077621941223, 0.4666630043977038, 0.549158952582053, 0.11516343633096804, 0.7848695255704033, -1.9977681536783165, -9.904231687047618, -0.6593105090736526, -1.3540323526517755, 0.5266530160118904, 0.714333313696264, -0.35285416878042364, -1.0404704801791564, -0.1732418669296476, -0.6827258518373127, 0.07543580019907481, 0.0866657554054232, -Inf, 0.4199852727563762, -0.4390575071937812, -Inf, 0.5774691587095182, -0.13789421175044203, 0.6068254787526659, -2.3508761453064957, 0.21245261208683408, 0.6571429467272203, -0.5549998773115323, -0.36668019873888147, -Inf, -Inf, -Inf, 0.048987369534396974, -Inf, -Inf, -Inf, -Inf, 0.6673218607971678, 0.725913989287794, -Inf, -Inf, -1.0725656202637452, -Inf, 0.7748275033262362, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -37.885009713167214, -Inf, -Inf, -Inf, -Inf, -Inf, 0.36459501003854267]","Any[0.3886685005868725, 0.4174667627562043, 0.43918984618745993, 0.3924778633433835, 0.4001224126380658, 0.3911103689017056, 0.3841026227398574, 0.4831364686348192, 0.45205733753676547, 0.4032158305327277, 0.3233426364916955, 0.6412147186675565, 0.6174688021401982, 0.4841937905151057, 0.38447913181360316, 0.3659951752789843, 0.400039295808166, 0.44187280505728, 0.06865854485107581, 0.1956425507306191, 0.6015482598613835, 0.3999884868995488, 0.41772932307567545, 0.24066797428013476, 0.6404577213146443, 0.3444720068406384, 0.5144577627564635, 0.6131348852972565, 0.5436255908232925, 0.3356544186294922, 0.6139907991012576, 0.719681489788158, 0.4254785407502196, 0.19899841410100294, 0.30367623861171544, 0.8157657865895847, 0.34364054382541664, 0.356627323637393, 0.6961129731223914, 0.6964764255507263, 0.39757064187662006, 0.64200463069248, 0.5839775725379359, 0.6924838261125585, 0.5719488715433722, 0.3888760057992061, 0.591865135708423, -0.04613978766252491, 0.6629354629671254, 0.44941089947426904, 0.2400691521349304, 0.8203644557179823, 0.6729617069489706, 0.9147491767151816, 0.9458451160713419, -0.41756297310371826, 0.04180872877437147, -0.13369005628410946, 0.4238396800263651, 0.10271466542336283, 0.8340164224874467, -0.08006171582451582, 0.42498823300596233, 0.8913131064694804, -1.2437356454433406, -1.205464635694684, 0.4996487222648547, 0.891888948541025, 0.17982993098075994, 0.9965201262915007, 0.5761334211442724, 0.7347430290547059, -2.646180198788289, -102.63152078230799, 0.9288126207835536, 0.9886572904280915, -1.4473400117715105, 0.997737103052332, 0.440658814608907]",Any[],46.8637,Dict(:hidden_nodes => 5),0.381919,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
6,n_1000_p_10,2,5,0.1,10,Imp-then-Reg 2 - nn,0.427881,0.399551,"Any[0.4252096480074009, 0.5829479304476874, 0.07376517606301647, 0.4296458405411727, 0.3146113850342459, 0.48052158755121344, 0.3859225561699624, 0.5575160281812015, 0.3427645036093403, -0.03049340253404953, 0.334877561482506, -2.509518235640218, -4.761457912648442, -0.12378759682922569, 0.6011270346256892, -0.5689813124736434, 0.5600853378318729, 0.6776760886844242, 0.1980978888591256, 0.6152669737897942, 0.5741654314468628, 0.21275361716713126, 0.7570733610515186, -2.070786506489417, -15.208455469407248, -0.9447762083909819, -0.24369520541348177, 0.4892884045824142, 0.7437195011748486, -0.3625325699328008, -0.6730071960582023, -0.12504455012164062, -0.23095235578229878, -0.08546421004788374, 0.04466460660908145, -Inf, 0.40657199277149425, -0.44357516487390525, -Inf, 0.6911101281568719, -0.28770857957813134, -0.010624551724792486, -2.742902463418436, 0.2780636392981688, 0.6464381695087156, -0.8371501169475124, -0.20939604139812085, -Inf, -Inf, -Inf, 0.3945793218250304, -Inf, -Inf, -Inf, -Inf, 0.8639736231050297, 0.5218675521890763, -Inf, -Inf, -0.8218640996994864, -Inf, 0.8040932022045952, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -39.22592110149481, -Inf, -Inf, -Inf, -Inf, -Inf, 0.49954762943086595]","Any[0.40570495574129783, 0.42362851000091095, 0.44380068164692943, 0.4076537229660564, 0.4407214030575516, 0.4143679932592026, 0.4038748739628887, 0.5033139693450794, 0.4502954548460324, 0.3866387632421956, 0.31538200830074836, 0.6451705085150572, 0.5056596791547128, 0.482915770138708, 0.35761434458054187, 0.34963333686929654, 0.3379441884714427, 0.44309446665591146, 0.15807873802432282, 0.16752587959708165, 0.624132088437471, 0.3608124285426838, 0.3996170996399472, 0.31894599543455426, 0.5886477985909035, 0.290490557850959, 0.5234467017615672, 0.6558078409317141, 0.5791758274984469, 0.39979361759665655, 0.6419063548159104, 0.7555156052749619, 0.5100434857260072, 0.276290876204472, 0.4398170634510997, 0.7970450169666181, 0.29887358650733975, 0.42839196565132365, 0.7197065872751095, 0.6689700713539305, 0.4576453540728197, 0.6740243735822611, 0.5366264890205361, 0.7240916969044213, 0.5290752509650987, 0.3933168053184064, 0.5550067856529652, -0.025489693534216817, 0.670080363471315, 0.5023277228980618, -0.0026208471829554103, 0.7455010110109045, 0.6480212180245126, 0.9201413875603472, 0.8777413847128028, -7.004574876031448, 0.5721947088146482, -0.39490744093377184, 0.260496428735436, -0.6525977168720118, 0.8158166785339495, -0.41226232164086696, 0.3719183201339611, 0.8815539976057037, -1.1993600498746524, -16.9303665891718, 0.42033371651205953, 0.9071824708959136, -0.8171729293141521, 0.9989966105014764, 0.667363305980778, 0.761407043945851, -2.5640932817718776, -61.202985483950606, 0.9190176141904917, 0.2918054279240655, -7.033636189818347, 0.9867005802034079, 0.42508472861072333]",Any[],58.512,Dict(:hidden_nodes => 5),0.392127,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
7,n_1000_p_10,2,5,0.1,10,Imp-then-Reg 3 - nn,0.427881,0.399448,"Any[0.4252096647763447, 0.582947912398432, 0.07376515027429675, 0.4296458391748972, 0.3146113672897084, 0.4805216443120398, 0.3859225401490527, 0.5575160295912418, 0.34276450489340937, -0.030493413864385976, 0.3348776442337337, -2.5095174054562213, -4.761459586704755, -0.12378743830834193, 0.6011270649986382, -0.568981267766516, 0.5600853953338678, 0.6776760818242833, 0.19809759171100239, 0.6152669333629466, 0.5741653805384368, 0.2127535885216988, 0.7570734204519126, -2.070786008625238, -15.208455195785191, -0.9447764280252771, -0.24369520541348177, 0.4892883843750636, 0.7437194392067887, -0.3625326699225855, -0.6730072908583418, -0.12504452866974836, -0.23095235578229878, -0.08546421004788374, 0.04466454105819173, -Inf, 0.40657200363953383, -0.44357492085588723, -Inf, 0.6911101777806332, -0.28770921647916614, -0.010624208703149618, -2.7429022573970863, 0.27806371250309114, 0.6464381901536351, -0.8371500237095872, -0.20939605099736247, -Inf, -Inf, -Inf, 0.39457921703906995, -Inf, -Inf, -Inf, -Inf, 0.8639736435822897, 0.5218677386591806, -Inf, -Inf, -0.8218640996994864, -Inf, 0.8040929188407949, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -39.22591528261121, -Inf, -Inf, -Inf, -Inf, -Inf, 0.4995476355108115]","Any[0.40570495500412873, 0.4248943719667838, 0.4438450883520949, 0.4076885197120439, 0.43705071790736616, 0.4128271357703309, 0.3990443201493289, 0.5035857922251508, 0.4504521651915314, 0.387802577678781, 0.3178778243454907, 0.6461879526791754, 0.5071141515954812, 0.4851360438318276, 0.3540915261379083, 0.3402886267434493, 0.3339688121763963, 0.44262393216182705, 0.15354331634132612, 0.1672885262138919, 0.6245048488076427, 0.3741136484125215, 0.3968107086975966, 0.3163227495030948, 0.5703464473024893, 0.2825717601509258, 0.5264244357605299, 0.6543322771608306, 0.5317695395154949, 0.3930700488416665, 0.6419284693690743, 0.7551647491096799, 0.50926451036932, 0.22091056400611941, 0.4421064800726664, 0.7972690222888987, 0.3044908310893829, 0.43111459296545174, 0.7126270462133109, 0.673898174242546, 0.4274540234719175, 0.6723107153568608, 0.5461614253252586, 0.7419796230813425, 0.5351550619862939, 0.39123918321548834, 0.5584837722809811, -0.0037233438765333915, 0.658509120735326, 0.5621368519672294, 0.2016072752562752, 0.7569506129057672, 0.6454900461698536, 0.9198646460225226, 0.9123681637013938, -7.3932722341076325, 0.4166533523812377, -0.4052593749519522, 0.37383536387992733, -0.6532447870180236, 0.8159002650852678, -0.3819915261350062, 0.37221130151924686, 0.9211137064456683, -1.204609699480339, -4.663954192027965, 0.4051033448307656, 0.9066853682283815, 0.142831484813013, 0.9989227889320881, 0.6667263997465183, 0.7616746755607389, -5.587234531198056, -53.803362089735934, 0.9143914138985048, 0.6900343655690095, -8.270317450305058, 0.988520926117294, 0.34281491185138513]",Any[],55.9656,Dict(:hidden_nodes => 5),0.392127,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
8,n_1000_p_10,2,5,0.1,10,Imp-then-Reg 4 - nn,0.420074,0.35315,"Any[0.4303511504413934, 0.5686686433638384, 0.33579075011963144, 0.39697051377992987, 0.3731021352178082, 0.39627953508969327, 0.3994176293795436, 0.5872182423298158, 0.39781080356368115, 0.17073250168065257, 0.19872754436482098, -2.397424193368689, -2.268843392426981, 0.004460703479162387, 0.565818812553273, -0.15924766704777382, 0.32998673763890773, 0.5199129614924107, 0.14626919493125934, 0.4359968717529237, 0.3862788184008218, 0.23725873278581733, 0.6287368574500016, -1.4599222260030862, -12.947302398673164, -0.8305768146663544, -0.4179737361661384, 0.5886884635069901, 0.7479779650875589, -0.35905518577392215, -1.1110211115488418, -0.14539397029884182, 0.9799065287870853, -0.4760285859526687, -0.16621827204755357, -Inf, 0.3694768354670762, -0.7231275435781279, -Inf, 0.2809882151570552, -0.4983073179312447, -0.019315607522374822, -1.6959542903349787, 0.3962188024183759, 0.5579496140878539, -1.149290802817438, -0.69979304641889, -Inf, -Inf, -Inf, -0.27970368808388124, -Inf, -Inf, -Inf, -Inf, 0.605747398339803, 0.8026151869122558, -Inf, -Inf, 0.03494098754101482, -Inf, 0.9254352573036184, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -108.17329593202665, -Inf, -Inf, -Inf, -Inf, -Inf, 0.2122324730435151]","Any[0.3685687298424143, 0.38323526543093045, 0.34068950430240386, 0.3634571992940603, 0.4022679794882331, 0.3373238765714358, 0.3988246930284609, 0.43068420995921397, 0.4033535942613362, 0.4175712004656449, 0.34076638332253906, 0.6387422210061031, 0.48519436048550313, 0.541370928809147, 0.33483769505538685, 0.45115956822114034, 0.37343992707463114, 0.43147495857300744, 0.03221400553747411, 0.109027523746285, 0.49072208465095146, 0.3435294433593551, 0.3388268064925841, 0.16154052045417866, 0.5560196758244456, 0.3910412944564947, 0.2613071131144983, 0.6046321831946262, 0.470704567335678, 0.29703415845595493, 0.6405341142760594, 0.6585460969959561, 0.3592521161203107, -0.03467983990380863, 0.3087912774393087, 0.8049153561337221, 0.30596152019565326, 0.5335505231784816, 0.6660722061179443, 0.6529478451045959, 0.5364309481627815, 0.5274605793209497, 0.4429701738170042, 0.6753881307733154, 0.5138610755311672, 0.31097864272479137, 0.6816938106074759, 0.057763912330316636, 0.6858650103627912, 0.317068419815528, 0.07479665778515321, 0.685279600461373, 0.741615465129814, 0.8666453817625264, 0.760503308478791, -0.4847406971855994, 0.8134513967524555, -0.22005993937870927, 0.2364923977545742, 0.7538061881858285, 0.7509168049801958, -0.07804494799768569, 0.3087872726844859, 0.9297231017123067, -1.6883551584142689, -2.649687463372799, 0.31293657811201026, 0.8681207045770818, -0.6973562379559348, 0.9404134098569568, 0.48911183516965595, 0.7594021270877005, -2.718818127955238, -14.314696426456484, 0.995427078672344, 0.9133736850042714, -13.142454452541521, 0.9965064539644239, 0.31297799402124005]","Any[-0.34248833572898024, 1.0687173728234751, -1.0235288149660053, -0.5132152319429715, 0.9727544156421395, -0.5042845886572083, -1.1591633154295267, -0.43771778641023035, 0.13789382406972084]",45.8,Dict(:hidden_nodes => 5),0.354542,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
9,n_1000_p_10,2,5,0.1,2,Imp-then-Reg 1 - nn,0.796302,0.791629,"Any[0.8017022771201804, 0.8138346476642295, 0.8484447179939855, 0.7279801036716174, 0.8161972541949157, 0.7844597076789214, 0.6907898642275283, 0.8545467103092715, 0.8659452499641678, 0.7468641270117076, 0.740015047341041, 0.8806263817993235, 0.7922834319158887, 0.7834960181499493, 0.636501267892068, 0.5221423598937255, 0.74402478375584, -0.1702742086608109, 0.8665508478503321, -0.3844463202261359, 0.9108670824188131, 0.4801832911764604, 0.8057252010623004, 0.8968762473218592, 0.680610677712051, 0.569306682901328, 0.7100563692268782, 0.6617169882742691, 0.7256718693979365, 0.8273458975400354, -9.583437857626066, -2.155774017379486, 0.39553069869768687, 0.23456298783399887, 0.9577582705478375, 0.892359939298553, 0.8233715338206609, 0.6976533231833588, 0.7591736591247334, 0.4372419620241029, 0.3620676158320896, -0.5487021324466317, 0.6395642306186773, 0.9622128413883845, 0.9326603009803422, -Inf, -Inf, -Inf, -10.492258501330024, -Inf, -Inf, -259.5374422667785, -2.2047158048110207, -Inf, -Inf, -Inf, -Inf, -Inf, 0.7518112225245934, -Inf, -0.09423888560051141, -Inf, -15.631146322474212, -Inf, -Inf, -Inf, 0.4335400463823025, -Inf, -Inf, -Inf, -Inf, 0.8945646364260715, -175.48872334493308, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, 0.5191279935646332]","Any[0.7897578674774436, 0.8018618524838745, 0.7715053121256235, 0.7552516615495283, 0.7417430744223037, 0.840766911153825, 0.8236218176905774, 0.8007020978024952, 0.812201963426673, 0.8484970816886604, 0.815843641350874, 0.7829132304012867, 0.7714173425151214, 0.8363545597778772, 0.8285429147531354, 0.8269880515364978, 0.7446505111532624, 0.7205271982386132, 0.8580766893247153, 0.6899512237364254, 0.7288121505514309, 0.8848249472385096, 0.9045193392916986, 0.8358040222547982, 0.8377870198726878, 0.8531769416418944, 0.6335728878216235, 0.839339232932082, 0.6710739561272128, 0.7789337760701939, 0.8340389345467599, 0.8497711144058634, 0.9249011161912575, 0.8137901479759098, 0.8267205629974135, 0.826340921100131, 0.797529431040721, 0.9028465249985602, 0.835330424176285, 0.8570401695451637, 0.8797987605801455, 0.8445698043287346, 0.9488112107065276, 0.8319864702312051, 0.7782325856099612, 0.9259102549328351, 0.9534579500765723, 0.7269811834754916, 0.745950608181226, 0.8883139053743965, 0.8840076895982165, 0.8453301005931257, 0.8259801404320223, 0.8591005964955498, 0.17819574753320955, 0.9705335334239478, 0.23330071566281718, 0.0007046671641501856, 0.39565238622975507, 0.9945980921017756, 0.9625283986447464, 0.32236855974418144, 0.9772738088508249, -1.170715180050653, 0.895158252648469, 0.9369540184375449, 0.7626329494982629, 0.9997807177804464, 0.6843098605889824, 0.7947495968417742, 0.8694303564955288, 0.6951935545923273, -0.4119609986078152, 0.8310439654349921, 0.9801471569062574, -2.677241717570779, 0.992342801138933, 0.9937745467804336, 0.8069294663453, -93.89674429224952, 0.9463105886255487, 0.7538231560726422]",Any[],37.6152,Dict(:hidden_nodes => 5),0.782113,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR
10,n_1000_p_10,2,5,0.1,2,Imp-then-Reg 2 - nn,0.793859,0.786556,"Any[0.7994279411603342, 0.8082055408822351, 0.8448957678664777, 0.6999438453171385, 0.8169656247919546, 0.7858660407024767, 0.6737817366982672, 0.8578347867344018, 0.8641489333031086, 0.7405858653657741, 0.7851543268179229, 0.8680910031648345, 0.7833482682292734, 0.8106308782047077, 0.6277452593338941, 0.520287901852242, 0.7037689995455148, -0.3993260694523004, 0.8367888299342725, -0.2850082430462655, 0.9043879340814462, 0.5351593696030147, 0.7965716112234889, 0.7838664652074602, 0.6786856186573922, 0.6358014464047106, 0.6591487279496332, 0.6726644344418851, 0.6827841931197063, 0.8283210407999014, -7.589503478655379, -2.0880920892636534, 0.540467463002453, 0.29594008313666975, 0.9740155952611258, 0.8775336021056935, 0.8730395063876283, 0.8442130132138419, 0.794651277834529, 0.46088292729589464, 0.2945919419871186, 0.41967305302337543, 0.7450205959060751, 0.9732866257764914, 0.9628367343744092, -Inf, -Inf, -Inf, -9.083355536285557, -Inf, -Inf, -285.09270049368735, -0.21875075461289017, -Inf, -Inf, -Inf, -Inf, -Inf, 0.6769120625762295, -Inf, -0.08638257959278017, -Inf, -10.22103898861963, -Inf, -Inf, -Inf, 0.3930040020477319, -Inf, -Inf, -Inf, -Inf, 0.8885688965701214, -307.02924045670454, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, -Inf, 0.40057338817518395]","Any[0.7867960066137684, 0.8028890783113746, 0.7577012245391297, 0.7492497787999545, 0.7353342904328992, 0.8397421183271672, 0.8206535617082806, 0.8002682067235234, 0.809054480969488, 0.8436275903862587, 0.8108107366659636, 0.7759155113761208, 0.7657754303501978, 0.8222583994204353, 0.8086266631473356, 0.8417406019494781, 0.7320734854692106, 0.6591526967672057, 0.876941123529496, 0.6345168152570089, 0.7301141372577542, 0.8753847266124208, 0.9044383452919689, 0.7970154692999691, 0.8568495381114788, 0.8481209081915677, 0.622010623345796, 0.8210471067450026, 0.7168147233317315, 0.7660979331207562, 0.8318164275465161, 0.8564754205616983, 0.9256463843190702, 0.7985600777563293, 0.823519603642052, 0.8368339029954719, 0.7756135786273715, 0.8685960163619513, 0.8235509720751647, 0.8384215373680368, 0.8786280392692184, 0.857279148258958, 0.9517298934321945, 0.8404214408404004, 0.8212926633382763, 0.9314997041927269, 0.9681125996621087, 0.7682658963203274, 0.782517942241871, 0.9074107611913024, 0.8867281066362728, 0.8173779720211305, 0.8831624174071493, 0.8111389447204527, 0.41229467021143495, 0.9691098536803866, 0.16036695601819262, 0.09400634735435931, 0.21262638628633124, 0.9956436832261534, 0.9540854046062717, 0.24671159825435784, 0.978191567732168, -1.4845669617978023, 0.9816686618256507, 0.9815203006522207, 0.664024612295442, 0.9986108618452576, 0.7753143227176187, 0.8272652433453636, 0.8215728679401677, 0.698841509848171, -0.16913174358617122, 0.8095678740708316, 0.9816506420865242, -2.575302136431561, 0.93783285434999, 0.9936260421901012, 0.8572592584578097, -57.32514173161605, 0.9095457201543555, 0.7267825606932892]",Any[],49.6144,Dict(:hidden_nodes => 5),0.784006,Imp-then-Reg,syn_X_mar,syn_Y_linear,1 - Lin-MAR


In [28]:
unique(filter(t -> t[:dataset] == "n_20_p_10", df)[:,:pMissing])

7-element Vector{Float64}:
 0.1
 0.2
 0.3
 0.4
 0.5
 0.6
 0.7

In [24]:
filter( t-> t[:nrow] < 80, combine(groupby(df, [:dataset, :method, :setting]), nrow))

Row,dataset,method,setting,nrow
Unnamed: 0_level_1,String15,String31,String,Int64
1,n_20_p_10,Imp-then-Reg 1 - nn,1 - Lin-MAR,70
2,n_20_p_10,Imp-then-Reg 2 - nn,1 - Lin-MAR,70
3,n_20_p_10,Imp-then-Reg 3 - nn,1 - Lin-MAR,70
4,n_20_p_10,Imp-then-Reg 4 - nn,1 - Lin-MAR,70
5,n_20_p_10,Imp-then-Reg 1 - best,1 - Lin-MAR,70
6,n_20_p_10,Imp-then-Reg 2 - best,1 - Lin-MAR,70
7,n_20_p_10,Imp-then-Reg 3 - best,1 - Lin-MAR,70
8,n_20_p_10,Imp-then-Reg 4 - best,1 - Lin-MAR,70
9,n_20_p_10,Imp-then-Reg 1 - nn,2 - Lin-NMAR,70
10,n_20_p_10,Imp-then-Reg 2 - nn,2 - Lin-NMAR,70


In [None]:
pbdata = unique(filter( t -> t[:nrow] < 90, combine(groupby(df, [:dataset, :method, :setting]), nrow))[:,[:method, :setting, :dataset]])

In [None]:
aux = filter( t -> t[:dataset] ∈ pbdata && t[:nrow] < 10, combine(groupby(df, [:dataset, :method, :setting, :pMissing]), nrow))
unique(aux[:,[:dataset, :setting, :pMissing, :nrow]])

In [None]:
filter(t -> t[:pMissing] == 0.1, unique(aux[:,[:dataset, :setting, :pMissing, :nrow]]))

In [None]:
filter(t -> t[:pMissing] == 0.3, unique(aux[:,[:dataset, :setting, :pMissing, :nrow]]))

In [None]:
filter(t -> startswith(t[:setting], "5"), unique(aux[:,[:dataset, :setting, :pMissing, :nrow]]))