# Example notebook

In [None]:
using Pkg
Pkg.activate("..")

In [None]:
using Revise
using PHD

In [None]:
using Random, Statistics, CSV, DataFrames
using LinearAlgebra

In [None]:
dataset_list = [d for d in split.(read(`ls ../datasets/`, String), "\n") if length(d) > 0]

In [None]:
results_table = DataFrame(dataset=[], copynum=[], iter=[], method=[], osr2=[])

In [None]:
SNR = 4

In [None]:
if !isdir("../results")
    mkdir("../results")
end

In [None]:
dname = "thyroid-disease-thyroid-0387" #"""thyroid-disease-thyroid-0387" #dataset_list[1]
i = 1
iter = 1

Read in a data file.

In [None]:
X_missing = DataFrame(CSV.read("../datasets/"*dname*"/$i/X_missing.csv", missingstrings=["", "NaN"]))
X_full = DataFrame(CSV.read("../datasets/"*dname*"/$i/X_full.csv"))
first(X_full, 5)

Create output

In [None]:
Random.seed!(2)
@time Y = PHD.linear_y(X_full, soft_threshold=0.1, SNR=SNR)

Add Offset

X_full[:,:One] = Ref(1.)
X_missing[:,:One] = Ref(1.) ;

- Method 1: Impute missing data using MICE and regress using LASSO.

In [None]:
X_imputed = PHD.mice(X_missing);
first(X_imputed, 5)

In [None]:
linear = PHD.regress(Y, X_imputed, lasso=true, alpha=1.0)

In [None]:
R2, OSR2 = PHD.evaluate(Y, X_imputed, linear)
@show R2
@show OSR2

In [None]:
X_imputed = PHD.standardize(X_imputed);

In [None]:
linear = PHD.regress(Y, X_imputed, lasso=true, alpha=1.0)

In [None]:
R2, OSR2 = PHD.evaluate(Y, X_imputed, linear)
@show R2
@show OSR2

In [None]:
push!(results_table, [dname, i, iter, "Impute then regress", OSR2])

- Method 2: Add indicator variables for missingness and impute zeros (finitely adaptive). Regress using LASSO.

In [None]:
X_augmented = hcat(PHD.zeroimpute(X_missing), PHD.indicatemissing(X_missing, removezerocols=true))
linear2 = PHD.regress(Y, X_augmented, lasso=true, alpha=0.8, missing_penalty=2.0)

In [None]:
R2, OSR2 = PHD.evaluate(Y, X_augmented, linear2)
@show R2
@show OSR2

In [None]:
X_augmented = PHD.standardize(X_augmented)
linear2 = PHD.regress(Y, X_augmented, lasso=true, alpha=0.8, missing_penalty=2.0)

In [None]:
R2, OSR2 = PHD.evaluate(Y, X_augmented, linear2)
@show R2
@show OSR2

- Method 3: Affinely adaptive

In [None]:
X_affine = PHD.augmentaffine(X_missing, removezerocols=true)
linear3 = PHD.regress(Y, X_affine, lasso=true, alpha=0.8, missing_penalty=20.0)

In [None]:
R2, OSR2 = PHD.evaluate(Y, X_affine, linear3)
@show R2
@show OSR2

In [None]:
X_affine = PHD.standardize(X_affine)
linear3 = PHD.regress(Y, X_affine, lasso=true, alpha=0.8, missing_penalty=90.0)

In [None]:
R2, OSR2 = PHD.evaluate(Y, X_affine, linear3)
@show R2
@show OSR2

In [None]:
push!(results_table, [dname, i, iter, "Augmented Affine", OSR2])

- Method 4: Finite adaptability with no zero-imputation

In [None]:
X_missing_std = PHD.standardize(X_missing)
@time gm = PHD.trainGreedyModel(Y, X_missing_std, 8, 0.8, 10)

In [None]:
PHD.print_ascii(gm)

In [None]:
PHD.evaluate(Y, X_missing_std, gm)

In [None]:
# CSV.write("../results/"*dname*"_$i.csv", results_table)