In [1]:
using Dates
using ArgParse
using Printf
using Random

include("../btg.jl")


credible_interval (generic function with 3 methods)

In [1]:
using ArgParse

In [6]:
s = ArgParseSettings()

@add_arg_table! s begin
    "--opt1"
        help = "an option with an argument"
    "--opt2", "-o"
        help = "another option with an argument"
        arg_type = Int
        default = 0
    "--flag1"
        help = "an option without argument, i.e. a flag"
        action = :store_true
    "arg1"
        help = "a positional argument"
        required = true
end
parsed_args = parse_args(ARGS, s)

Dict{String,Any} with 4 entries:
  "flag1" => false
  "arg1"  => "/home/xz584/.local/share/jupyter/runtime/kernel-db366c4a-0ff7-4a7…
  "opt1"  => nothing
  "opt2"  => 0

In [None]:
include()

In [7]:
parsed_args["opt1"]

In [35]:
seed = 1234

1234

In [43]:
rng = MersenneTwister(seed)
ind_shuffle = randperm(rng, 10) 

10-element Array{Int64,1}:
  2
  1
  7
  9
  5
 10
  4
  8
  6
  3

In [2]:
# load abalone data
df = DataFrame(CSV.File("../datasets/abalone.csv"))
data = convert(Matrix, df[:,2:8]) #length, diameter, height, whole weight, shucked weight, viscera weight, shell weight
target = convert(Array, df[:, 9]) #age

# shuffle data
rng = MersenneTwister(1234)
ind_shuffle = randperm(rng, size(data, 1)) 
data = data[ind_shuffle, :]
target = target[ind_shuffle]
# training set
id_train = 1:200; posx = 1:7; posc = 1:7; n_train = length(id_train)
x = data[id_train, posx] 
Fx = data[id_train, posc] 
y = float(target[id_train])
ymax_train = maximum(y)
y ./= ymax_train
trainingData0 = trainingData(x, Fx, y) 
d = getDimension(trainingData0); n = getNumPts(trainingData0); p = getCovDimension(trainingData0)


7

In [3]:
myquadtype = ["QuasiMonteCarlo", "QuasiMonteCarlo"]
rangeλ = [-1.5 1.] 
rangeθs = [0.125 1000]
rangeθm = repeat(rangeθs, d, 1)
single = true
rangeθ = single ? rangeθs : rangeθm
# build btg model
btg0 = btg(trainingData0, rangeθ, rangeλ; quadtype = myquadtype)
(pdf0_raw, cdf0_raw, dpdf0_raw, quantInfo0_raw) = solve(btg0);

# Test

In [14]:
id_test = 1001:1100
n_test = length(id_test)
id_fail = []
id_nonproper = []
x_test = data[id_test, posx]
Fx_test = data[id_test, posc]
y_test_true = target[id_test]
count_test = 0
error_abs = 0.
error_sq = 0.
nlpd = 0.

0.0

In [None]:
for i in 1:n_test
    global error_abs, error_sq, nlpd, count_test
    before = Dates.now()
    mod(i, 10) == 0 ? (@info i) : nothing
    # @info "i" i
    x_test_i = reshape(x_test[i, :], 1, length(posx))
    Fx_test_i = reshape(Fx_test[i, :], 1, length(posc))
    try
        pdf_test_i, cdf_test_i, dpdf_test_i, quantbound_test_i, support_test_i = pre_process(x_test_i, Fx_test_i, pdf0_raw, cdf0_raw, dpdf0_raw, quantInfo0_raw)
        y_test_i_true = y_test_true[i]
        median_test_i = ymax_train * quantile(cdf_test_i, quantbound_test_i, support_test_i)[1]
        # @info "True, median " y_test_i_true, median_test_i
        try 
            CI_test_i = ymax_train .* credible_interval(cdf_test_i, quantbound_test_i, support_test_i; mode=:equal, wp=.95)[1]
            count_test += (y_test_i_true >= CI_test_i[1])&&(y_test_i_true <= CI_test_i[2]) ? 1 : 0
            # @info "CI" CI_test_i
        catch err
            append!(id_fail, i)
        end
        error_abs += abs(y_test_i_true - median_test_i)
        error_sq += (y_test_i_true - median_test_i)^2
        nlpd += log(pdf_test_i(y_test_i_true)) 
    # @info "Count, id_fail" count_test, id_fail
    catch err 
        append!(id_nonproper, i)
    end
    after = Dates.now()
    elapsedmin = round(((after - before) / Millisecond(1000))/60, digits=5)
    @info "Elapsed time:" elapsedmin
end

┌ Info: Elapsed time:
│   elapsedmin = 0.35127
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.34528
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.32573
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.31762
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.3304
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.31972
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.32567
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.32418
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.32608
└ @ Main In[15]:29
┌ Info: 10
└ @ Main In[15]:4
┌ Info: Elapsed time:
│   elapsedmin = 0.32512
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.33662
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.3277
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.33032
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.33017
└ @ Main In[15]:29
┌ Info: Elapsed time:
│   elapsedmin = 0.31813
└ 

In [16]:
count_test /= n_test - length(id_fail)
error_abs  /= n_test
error_sq   /= n_test
nlpd       /= -n_test

80.55491326322891

In [17]:
error_sq

6.6271409350295825

In [18]:
error_abs

1.837001061907537

In [19]:
nlpd

80.55491326322891

# Validation

In [None]:
function lootd(td::AbstractTrainingData, i::Int64)
        x = getPosition(td)
        Fx = getCovariates(td)
        z = getLabel(td)
        x_minus_i = x[[1:i-1;i+1:end], :]
        Fx_minus_i = Fx[[1:i-1;i+1:end], :]
        z_minus_i = z[[1:i-1;i+1:end]]
        x_i = x[i:i, :]
        Fx_i = Fx[i:i, :]
        z_i = z[i:i, :]
        return trainingData(x_minus_i, Fx_minus_i, z_minus_i), x_i, Fx_i, z_i
end