# Example notebook

In [None]:
using Random, Statistics, CSV, DataFrames

In [None]:
include("../impute.jl")
include("../regress.jl")
include("../augment.jl")

In [None]:
dataset_list = [d for d in split.(read(`ls ../datasets/`, String), "\n") if length(d) > 0]

In [None]:
results_table = DataFrame(dataset=[], copynum=[], iter=[], method=[], osr2=[])

In [None]:
SNR = 4

In [None]:
if !isdir("../results")
    mkdir("../results")
end

In [None]:
dname = dataset_list[1]
i = 1
iter = 1

Read in a data file.

In [None]:
X_missing = DataFrame(CSV.read("../datasets/"*dname*"/$i/X_missing.csv"))
X_full = DataFrame(CSV.read("../datasets/"*dname*"/$i/X_full.csv"))
first(X_full, 5)

Create ouput

In [None]:
function softthresholding(x; λ=0.1)
    if x > λ
        return x - λ
    elseif x < -λ
        return x + λ
    else 
        return 0 
    end
end

In [None]:
n,p = size(X_full)
wtrue = softthresholding.(randn(p))
btrue = rand(1)

test_index = findfirst(names(X_full) .== :Test)
wtrue[test_index] = 0.

μ = mean(Matrix{Float64}(X_full), dims=1)
σ = std(Matrix{Float64}(X_full), dims=1)
σ[findall(σ .== 0)].= 1
X_normalize = (X_full .- μ) ./ σ

Y = Matrix{Float64}(X_normalize)*wtrue .+ btrue

In [None]:
noise = randn(size(X_full,1))
noise .*= norm(Y)/norm(noise)/SNR
Y .+= noise

Add Offset

X_full[:,:One] = Ref(1.)
X_missing[:,:One] = Ref(1.) ;

- Method 1: Impute missing data using MICE and regress using LASSO.

In [None]:
X_imputed = mice(X_missing);
first(X_imputed, 5)

In [None]:
linear = regress(Y, X_imputed, lasso=true)

In [None]:
R2, OSR2 = evaluate(Y, X_imputed, linear)
@show R2
@show OSR2

In [None]:
push!(results_table, [dname, i, iter, "Impute then regress", OSR2])

- Method 2: Add indicator variables for missingness and impute zeros (finitely adaptive). Regress using LASSO.

In [None]:
X_augmented = hcat(zeroimpute(X_missing), indicatemissing(X_missing))
linear2 = regress(Y, X_augmented, lasso=true)

In [None]:
R2, OSR2 = evaluate(Y, X_augmented, linear2)
@show R2
@show OSR2

In [None]:
push!(results_table, [dname, i, iter, "Augmented", OSR2])

- Method 3: Affinely adaptive

In [None]:
X_affine = augmentaffine(X_missing)
linear3 = regress(Y, X_affine, lasso=true)

In [None]:
R2, OSR2 = evaluate(Y, X_affine, linear3)
@show R2
@show OSR2

In [None]:
push!(results_table, [dname, i, iter, "Augmented Affine", OSR2])

In [None]:
CSV.write("../results/"*dname*"_$i.csv", results_table)