In [34]:
using LinearAlgebra                                                             
using Test                                                                                       
using Distributions                                                             
using DataFrames                                                                          
using CSV                                                                              
using Polynomials  
using Roots
using Plots
using StatsFuns
using Pkg
Pkg.add("Sobol")

                                                                                 
include("../computation/finitedifference.jl")                                                   
include("../quadrature/quadrature.jl")                                             
include("../transforms/transforms.jl")                                             
include("../priors/priors.jl")                                                     
include("../bayesopt/incremental.jl")                                              
include("../kernels/kernel.jl")                                                    
include("../datastructs.jl")                                                       
include("../computation/buffers0.jl") #datastruct, kernel, incremental, quadrature 
include("../model0.jl") #buffers, datastructs, several auxiliary  
# include("../model_MC.jl") #buffers, datastructs, several auxiliary                   
include("../computation/tdist.jl") #model0 and buffer0  


[32m[1m  Updating[22m[39m registry at `C:\Users\Albert Huang\.julia\registries\General`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[?25l[2K[?25h[32m[1m Resolving[22m[39m package versions...
[32m[1m Installed[22m[39m OpenSSL_jll ─── v1.1.1+2
[32m[1m Installed[22m[39m libass_jll ──── v0.14.0+1
[32m[1m Installed[22m[39m GeometryTypes ─ v0.8.2
[32m[1m  Updating[22m[39m `C:\Users\Albert Huang\Desktop\btg\BayesTransformedGaussian\Project.toml`
 [90m [ed01d8cd][39m[92m + Sobol v1.3.0[39m
[32m[1m  Updating[22m[39m `C:\Users\Albert Huang\Desktop\btg\BayesTransformedGaussian\Manifest.toml`
 [90m [68821587][39m[93m ↑ Arpack_jll v3.5.0+2 ⇒ v3.5.0+3[39m
 [90m [4fba245c][39m[93m ↑ ArrayInterface v2.6.1 ⇒ v2.8.3[39m
 [90m [6e34b625][39m[93m ↑ Bzip2_jll v1.0.6+1 ⇒ v1.0.6+2[39m
 [90m [e66e0078][39m[93m ↑ CompilerSupportLibraries_jll v0.3.1+0 ⇒ v0.3.3+0[39m
 [90m [864edb3b][39m[93m ↑ DataStructures v0.17.1

compute_BO_derivs

In [35]:
function covariate_fun(x, p)
    n = size(x, 1)
    d = size(x, 2)
    if p == 1
        return ones(n, 1)
    elseif p == 1 + d
        return hcat(ones(n), x)
    else
         throw(ArgumentError("Only support constant or linear convariate."))
    end
end

covariate_fun (generic function with 1 method)

In [36]:
# Data and setup for different tests

# choice of covariate functions
p = 1
p = 2

# training
x1 = [0., .5, .8, 1., 1.6, 2.]
x2 = [0., .5, .8, 1., 2.]
x3 = [0., .8, 1., 1.6, 2.]
x4 = [0., 1., 1.6, 2.]

# testing
x01 = .25
x02 = 1.2
x03 = .5
x04 = .6

# underlying 1d function
test_fun1(x) = sin(x) + 1.1
test_fun2(x) = sin(15*x) + (x^2+1) 
test_fun3(x) = log(x^2+1) + (x^3+x+1)/(exp(x)) 

test_fun3 (generic function with 1 method)

# Summary
**Settings parameters**: \
p: choices of covariates, p = 1 if constant and p = 2 if linear \
x: training data \
x0: testing data
test_fun: underlying function

**Tests**:
1. p=1 or p=2, x=x1, x0=x01, test_fun=test_fun1 \
    narrow pdf, some quantile comp hard ==> bracketing interval $[0,5]$ works well!
2. p=1, x2, x02, test_fun2 \
    pdf very large near 0, so 2.5% quantile estimation gives negative estimate, which is not a good guess and also indicates that 2.5% quantile is extremely small. 
3. p=1, x3, x03, test_fun3 \
    narrow pdf, median hard ==> bracketing interval $[0,5]$ works well! \
    cdf(0) = 0.02898, 2.5% quantile doesn't exist
4. p=1, x4, x04, test_fun1 \
    two-peak pdf, can do mode, some quantile, but two-peak makes computation of CI (equal and narrow) super hard.
    
**Conclusion**:
1. Narrow pdf is the hardest case. Even if quant0 provides relatively good initial guess, find_zero easily goes to negative region, where CDF is not defined. \
Solution: bracketing interval works better than initial guess in root-finding
2. cdf(0) > 2.5% makes 2.5% quantile doesn't exist          
3. Weird shape: large pdf near 0 and two-peak pdf should be carefully treated. 
 

In [37]:
p = 1 # choose covariate basis function, only support 1 or 1+dim(x)
x = x3 # choose training data 
x0 = x03 # choose testing data
test_fun = test_fun1 # choose underlying function

x = reshape(x, size(x, 1), 1)
x0 = reshape([x0], 1, 1) 
Fx = covariate_fun(x, p)
y = reshape(test_fun.(x), size(x, 1))
ymax = maximum(y)
y /= ymax
trainingData1 = trainingData(x, Fx, y) 

Fx0 = covariate_fun(x0, p)
y0_true = reshape(test_fun.(x0), 1)./ymax
println("Prediction at x0 = $x0")
println("Covariates at x0: $Fx0")
println("Should expect peak near $y0_true")

Prediction at x0 = [0.5]
Covariates at x0: [1.0]
Should expect peak near [0.752260142876725]


In [39]:
d = getDimension(trainingData1)
n = getNumPts(trainingData1)
p = getCovDimension(trainingData1)
rangeθ = [0.25 100]
rangeλ = [0.5 5]

btg1 = btg(trainingData1, rangeθ, rangeλ; quadtype = ["MonteCarlo", "MonteCarlo"])
weightTensorGrid = weight_comp(btg1)
pdf1, cdf1, dpdf1, quantInfo = solve(btg1);

In [40]:
btg2 = btg(trainingData1, rangeθ, rangeλ)
weightTensorGrid2 = weight_comp(btg2)
pdf2, cdf2, dpdf2, quantInfo2 = solve(btg2);

In [41]:
btg3 = btg(trainingData1, rangeθ, rangeλ; quadtype = ["MonteCarlo", "Gaussian"])
weightTensorGrid3 = weight_comp(btg3)
pdf3, cdf3, dpdf3, quantInfo3 = solve(btg3);

In [53]:
ENV["GRDIR"]=""
Pkg.build("GR")
using Plots
# Plot to see the shape of pdf and cdf
dpdf_fixed = y0 -> dpdf1(x0, Fx0, y0) 
pdf_fixed = y0 -> pdf1(x0, Fx0, y0)
cdf_fixed = y0 -> cdf1(x0, Fx0, y0)
dpdf_fixed2 = y0 -> dpdf2(x0, Fx0, y0) 
pdf_fixed2 = y0 -> pdf2(x0, Fx0, y0)
cdf_fixed2 = y0 -> cdf2(x0, Fx0, y0)
dpdf_fixed3 = y0 -> dpdf3(x0, Fx0, y0) 
pdf_fixed3 = y0 -> pdf3(x0, Fx0, y0)
cdf_fixed3 = y0 -> cdf3(x0, Fx0, y0)
a = 1e-3; b = 1.
plt(cdf_fixed, a, b, 200, label = "MC")
plt!(cdf_fixed2, a, b, 200, label = "Gaussian")
plt!(cdf_fixed3, a, b, 200, label = "MC+Gaussian")

[32m[1m  Building[22m[39m GR → `C:\Users\Albert Huang\.julia\packages\GR\yMV3y\deps\build.log`


GKS: svgplugin.dll: can't load library, error 126 (0x7e)


SystemError: SystemError: opening file "C:\\Users\\ALBERT~1\\AppData\\Local\\Temp\\jl_91E7.tmp.svg": No such file or directory

In [29]:
plt(pdf_fixed, a, b, 200, label = "MC")
plt!(pdf_fixed2, a, b, 200, label = "Gaussian")
plt!(pdf_fixed3, a, b, 200, label = "MC+Gaussian")

UndefVarError: UndefVarError: Fx0 not defined

# Quantile computation

In [11]:
include("test_quantile.jl")
include("../statistics.jl")

ErrorException: could not open file C:\Users\Albert Huang\Desktop\btg\BayesTransformedGaussian\src\test\test_quantile.jl

In [12]:
# preprocess output from solve(btg)
pdf_fixed, cdf_fixed, dpdf_fixed, quant0, support = pre_process(x0, Fx0, pdf, cdf, dpdf, quantInfo[1]);
println("MC: Estimated support of pdf: $support")
pdf_fixed2, cdf_fixed2, dpdf_fixed2, quant02, support2 = pre_process(x0, Fx0, pdf2, cdf2, dpdf2, quantInfo2[1]);
println("Gaussian: Estimated support of pdf: $support2")
pdf_fixed3, cdf_fixed3, dpdf_fixed3, quant03, support3 = pre_process(x0, Fx0, pdf3, cdf3, dpdf3, quantInfo3[1]);
println("MonteCarlo + Gaussian: Estimated support of pdf: $support3")

UndefVarError: UndefVarError: quantInfo not defined

In [13]:
DistributionInfo = summary_comp(pdf_fixed, cdf_fixed, dpdf_fixed, quant0, support; px = .8)

UndefVarError: UndefVarError: summary_comp not defined

In [14]:
DistributionInfo2 = summary_comp(pdf_fixed2, cdf_fixed2, dpdf_fixed2, quant02, support2; px = .8)

UndefVarError: UndefVarError: summary_comp not defined

In [15]:
DistributionInfo3 = summary_comp(pdf_fixed3, cdf_fixed3, dpdf_fixed3, quant03, support3; px = .8)

UndefVarError: UndefVarError: summary_comp not defined