In [1]:
using Dates
using ArgParse
using Printf
using Random

include("../btg.jl")


credible_interval (generic function with 3 methods)

In [2]:
# load abalone data
df = DataFrame(CSV.File("../datasets/abalone.csv"))
data = convert(Matrix, df[:,2:8]) #length, diameter, height, whole weight, shucked weight, viscera weight, shell weight
target = convert(Array, df[:, 9]) #age

# shuffle data
ind_shuffle = randperm(MersenneTwister(1234), size(data, 1)) 
data = data[ind_shuffle, :]
target = target[ind_shuffle]
# training set
id_train = 1:200; posx = 1:7; posc = 1:3
x = data[id_train, posx] 
Fx = data[id_train, posc] 
y = float(target[id_train])
ymax_train = maximum(y)
y ./= ymax_train
trainingData0 = trainingData(x, Fx, y) #training data used for testing various functions
d = getDimension(trainingData0); n = getNumPts(trainingData0); p = getCovDimension(trainingData0)


3

In [3]:
#parameter setting
rangeθ = [10.0 1000]
rangeθm = repeat(rangeθ, d, 1)
rangeλ = [-1. 1.] #we will always used 1 range scale for lambda
myquadtype1 = ["SparseGrid", "Gaussian"]
myquadtype2 = ["QuasiMonteCarlo", "QuasiMonteCarlo"]
myquadtype = ["SparseCarlo", "SparseCarlo"]
myquadtype4 = ["Gaussian", "Gaussian"]
# fit btg model
btg0 = btg(trainingData0, rangeθm, rangeλ; quadtype = myquadtype)
(pdf0_raw, cdf0_raw, dpdf0_raw, quantInfo0_raw) = solve(btg0); #initialize training_buffer_dicts, solve once so can use fast techiques to extrapolate submatrix determinants, etc.


# Test

In [4]:
id_test = 1001:1030
n_test = length(id_test)
id_fail = []
x_test = data[id_test, posx]
Fx_test = data[id_test, posc]
y_test_true = target[id_test]
count_test = 0
error_abs = 0.
error_sq = 0.
nlpd = 0.

0.0

In [5]:
for i in 1:n_test
    global error_abs, error_sq, nlpd, count_test
    # mod(i, 10) == 0 ? (@info i) : nothing
    @info "i" i
    x_test_i = reshape(x_test[i, :], 1, length(posx))
    Fx_test_i = reshape(Fx_test[i, :], 1, length(posc))
    pdf_test_i, cdf_test_i, dpdf_test_i, quantbound_test_i, support_test_i = pre_process(x_test_i, Fx_test_i, pdf0_raw, cdf0_raw, dpdf0_raw, quantInfo0_raw)
    y_test_i_true = y_test_true[i]
    median_test_i = ymax_train * quantile(cdf_test_i, quantbound_test_i, support_test_i)[1]
    @info "True, median " y_test_i_true, median_test_i
    try 
        CI_test_i = ymax_train .* credible_interval(cdf_test_i, quantbound_test_i, support_test_i; mode=:equal, wp=.95)[1]
        count_test += (y_test_i_true >= CI_test_i[1])&&(y_test_i_true <= CI_test_i[2]) ? 1 : 0
        @info "CI" CI_test_i
    catch err
        append!(id_fail, i)
    end
    error_abs += abs(y_test_i_true - median_test_i)
    error_sq += (y_test_i_true - median_test_i)^2
    nlpd += log(pdf_test_i(y_test_i_true)) 
    @info "Count, id_fail" count_test, id_fail
end

┌ Info: i
│   i = 1
└ @ Main In[5]:4
┌ Info: True, median 
│   (y_test_i_true, median_test_i) = (10, 4.797367768086591)
└ @ Main In[5]:10
┌ Info: CI
│   CI_test_i = [3.492390678598973, 6.5828005340691]
└ @ Main In[5]:14
┌ Info: Count, id_fail
│   (count_test, id_fail) = (0, Any[])
└ @ Main In[5]:21
┌ Info: i
│   i = 2
└ @ Main In[5]:4
┌ Info: True, median 
│   (y_test_i_true, median_test_i) = (13, 7.964525432120251)
└ @ Main In[5]:10
┌ Info: CI
│   CI_test_i = [3.2490348787688252, 19.50891144603267]
└ @ Main In[5]:14
┌ Info: Count, id_fail
│   (count_test, id_fail) = (1, Any[])
└ @ Main In[5]:21
┌ Info: i
│   i = 3
└ @ Main In[5]:4
┌ Info: True, median 
│   (y_test_i_true, median_test_i) = (7, 10.740513011500154)
└ @ Main In[5]:10
┌ Info: CI
│   CI_test_i = [4.385873878166963, 26.27607396624067]
└ @ Main In[5]:14
┌ Info: Count, id_fail
│   (count_test, id_fail) = (2, Any[])
└ @ Main In[5]:21
┌ Info: i
│   i = 4
└ @ Main In[5]:4
┌ Info: True, median 
│   (y_test_i_true, median_test_i) =

┌ Info: CI
│   CI_test_i = [6.145197243176283, 15.6110255724764]
└ @ Main In[5]:14
┌ Info: Count, id_fail
│   (count_test, id_fail) = (26, Any[])
└ @ Main In[5]:21
┌ Info: i
│   i = 29
└ @ Main In[5]:4
┌ Info: True, median 
│   (y_test_i_true, median_test_i) = (15, 11.723171379527965)
└ @ Main In[5]:10
┌ Info: CI
│   CI_test_i = [4.916247688867088, 27.950181512127717]
└ @ Main In[5]:14
┌ Info: Count, id_fail
│   (count_test, id_fail) = (27, Any[])
└ @ Main In[5]:21
┌ Info: i
│   i = 30
└ @ Main In[5]:4
┌ Info: True, median 
│   (y_test_i_true, median_test_i) = (6, 5.598715531867994)
└ @ Main In[5]:10
┌ Info: CI
│   CI_test_i = [4.712420849913544, 6.7360676380491]
└ @ Main In[5]:14
┌ Info: Count, id_fail
│   (count_test, id_fail) = (28, Any[])
└ @ Main In[5]:21


In [6]:
count_test /= n_test - length(id_fail)
error_abs  /= n_test
error_sq   /= n_test
nlpd       /= -n_test

40.18671922407129

In [7]:
error_sq

14.098494448005281

In [8]:
error_abs

2.9691971991088915

In [9]:
nlpd

40.18671922407129

# Validation

In [None]:
function lootd(td::AbstractTrainingData, i::Int64)
        x = getPosition(td)
        Fx = getCovariates(td)
        z = getLabel(td)
        x_minus_i = x[[1:i-1;i+1:end], :]
        Fx_minus_i = Fx[[1:i-1;i+1:end], :]
        z_minus_i = z[[1:i-1;i+1:end]]
        x_i = x[i:i, :]
        Fx_i = Fx[i:i, :]
        z_i = z[i:i, :]
        return trainingData(x_minus_i, Fx_minus_i, z_minus_i), x_i, Fx_i, z_i
end