In [1]:
using Dates
using ArgParse
using Printf
using Random
using GaussianProcesses
using Cubature
include("../../btg.jl")

print (generic function with 7 methods)

In [2]:
#########################
# initialize training set
#########################
# args here: ntrain, posc, yshift, are part of data process, no need to put into BTG options
# so keep them here 

df = DataFrame(CSV.File("../../datasets/abalone.csv", header = 0))
data = convert(Matrix, df[:,2:8]) #length, diameter, height, whole weight, shucked weight, viscera weight, shell weight
target = convert(Array, df[:, 9]) #age
ntrain_size = 1000
feature_size = 7
# shuffle data
randseed = 1234; rng = MersenneTwister(randseed)
ind_shuffle = randperm(rng, size(data, 1)) 
data = data[ind_shuffle, :]
target = target[ind_shuffle];
id_train = 1:100
n_train = length(id_train)
posx = 1:feature_size
x = data[id_train, posx]

posc = 3
posc = 1:posc
Fx = data[id_train, posc] 

y = float(target[id_train])
trainingData0 = trainingData(x, Fx, y) 
d = getDimension(trainingData0); n = getNumPts(trainingData0); p = getCovDimension(trainingData0)
ymax = trainingData0.ymax

22.0

In [3]:
rangeλ = reshape(convert(Array{Float64, 1}, [-1, 2]), 1, 2)
rangeθ = reshape(convert(Array{Float64, 1}, [100, 2000]), 1, 2)
# rangeθm = repeat(rangeθ, 7, 1)

1×2 Array{Float64,2}:
 100.0  2000.0

In [4]:
options = Options(rangeθ, rangeλ, trainingData0, 1)

Options("BoxCox", "Gaussian", Dict("λ" => "Gaussian","θ" => "Gaussian"), Dict("Gaussian" => 12,"MonteCarlo" => 400), Dict("λ" => [-1.0 2.0],"θ" => [100.0 2000.0]), Dict{String,priorType}("λ" => Uniform(Real[-1.0 2.0], 1, Real[3.0]),"θ" => inverseUniform([100.0 2000.0], 1, Real[1900.0])), 1, 0.95)

In [5]:
@time btg0 = btg(trainingData0, options);
print(btg0.options)

  7.043697 seconds (22.30 M allocations: 1.087 GiB, 10.72% gc time)


   transform type:       BoxCox
   Kernel type:          Gaussian
   quadrature type:      Dict("λ" => "Gaussian","θ" => "Gaussian")
   quadrature size:      Dict("Gaussian" => 12,"MonteCarlo" => 400)
   Confidence level:     0.95
   Parameter range:      
                         λ: [-1.0 2.0]
                         θ: [100.0 2000.0]
   Parameter prior:      
                         λ: Uniform(Real[-1.0 2.0], 1, Real[3.0])
                         θ: inverseUniform([100.0 2000.0], 1, Real[1900.0])


In [6]:
##################################################
# process test data 
##################################################
ntrain_size = 800
ntest = 5
id_test = (ntrain_size+1) :(ntest+ntrain_size)
n_test = length(id_test)
x_test = data[id_test, posx]; # n*d
posc = 1:3
Fx_test = data[id_test, posc] 
y_test_true = reshape(float(target[id_test]), ntest, 1)

5×1 Array{Float64,2}:
 10.0
  7.0
 11.0
  8.0
 21.0

In [7]:
# @time predict = btgPredict(x_test, Fx_test, btg0; y_true = y_test_true);
@time predict = btgPredict(x_test, Fx_test, btg0; y_true = y_test_true);

12×12 Array{Float64,2}:
 3.54944e-16  4.34664e-15  8.41411e-14  …  5.37718e-19  1.84734e-20
 9.57546e-8   1.08355e-6   1.66977e-5      4.85962e-15  8.55911e-17
 0.000505632  0.005001     0.0549122       1.59434e-16  1.43439e-18
 0.000260594  0.00216993   0.0166898       2.38741e-20  1.47063e-22
 4.88929e-6   3.49436e-5   0.000202844     5.10515e-24  2.63774e-26
 6.55111e-8   4.19123e-7   2.00731e-6   …  6.22002e-27  2.96262e-29
 1.4916e-9    8.85614e-9   3.74152e-8      3.74833e-29  1.71554e-31
 6.80989e-11  3.84605e-10  1.49669e-9      7.86005e-31  3.52226e-33
 5.98643e-12  3.26878e-11  1.20437e-10     4.27542e-32  1.89293e-34
 9.08628e-13  4.85105e-12  1.72396e-11     4.79554e-33  2.10792e-35
 2.05277e-13  1.0806e-12   3.75473e-12  …  9.01802e-34  3.94707e-36
 5.05663e-14  2.64235e-13  9.07451e-13     2.0221e-34   8.83142e-37


 weightsTensorGrid
 12.410278 seconds (40.68 M allocations: 1.907 GiB, 5.42% gc time)


In [8]:
predict.testingdata

testingData([0.675 0.51 … 0.3175 0.3965; 0.34 0.25 … 0.0405 0.05; … ; 0.46 0.35 … 0.077 0.123; 0.68 0.55 … 0.4075 0.585], [0.675 0.51 0.195; 0.34 0.25 0.075; … ; 0.46 0.35 0.1; 0.68 0.55 0.2], [10.0; 7.0; … ; 8.0; 21.0], 7, 3, 5)

In [9]:
predict.median

5-element Array{Real,1}:
  9.121171329404955
  6.816050410640659
  9.451162968569534
  7.253598463820227
 10.211018903131869

In [10]:
predict.credible_intervel

5-element Array{Array,1}:
 Real[4.8403041369058855, 18.96200130204881]
 Real[5.795380873543383, 8.105537406676829]
 Real[6.944307586036174, 13.094156687117545]
 Real[4.707373191995036, 11.657694503618272]
 Real[4.48646918103099, 28.091019083353377]

In [11]:
predict.absolute_error

5-element Array{Real,1}:
  0.8788286705950448
  0.18394958935934103
  1.548837031430466
  0.7464015361797731
 10.788981096868131

In [12]:
predict.negative_log_pred_density

5-element Array{Real,1}:
 -0.8938289428414969
 -2.6529760490608183
 -1.1364793759317373
 -1.4632551864651913
  1.3694100840637113

In [13]:
predict.time_cost

Dict{String,Float64} with 5 entries:
  "time_median"     => 1.10179
  "time_total"      => 8.53491
  "time_eval"       => 0.0164432
  "time_preprocess" => 5.32105
  "time_CI"         => 1.90396

In [14]:
predict.mean_abs_err

2.829399584886551

In [15]:
predict.mean_sq_err

24.03286035908101

In [16]:
P = predict;

In [17]:
f1 = P.pdf[1]

(::var"#pdf_fixed#298"{Array{Float64,2},Array{Float64,2},var"#140#159"{btg,Array{Float64,2},Array{Float64,2},var"#evalgrid_pdf!#149"{Array{Function,2},SubArray{Float64,2,Array{Float64,2},Tuple{UnitRange{Int64},UnitRange{Int64}},false},var"#evalgrid!#147"},var"#checkInput#156"}}) (generic function with 1 method)

In [18]:
xgrid = range(.01, stop=1.3, length=100)

0.01:0.013030303030303031:1.3

In [None]:
plot()