In [None]:
using Cuba, Distributions
using BenchmarkTools, Test

@test Threads.nthreads()>1

In [None]:
M=25 # number of independent beta random variables
atol=1e-6
rtol=1e-3

# integrate the pdf of the joint distribution -- should always equal 1
function int(x, f)
   f[1] = pdf(Product(Beta.(1.0,2.0*ones(M))),x)
end

# multithread
function int_thread_col(x, f)
    Threads.@threads for i in 1:size(x,2)
      f[i] = pdf(Product(Beta.(1.0,2.0*ones(M))),@view(x[:,i]))
    end
end

# multithread and loop to create product distribution
function int_thread_el(x,f)
   f[1,:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       for i in 1:size(x, 1)
           f[1, j] *= pdf(Beta(1.0,2.0),@view(x[i,j]))
       end
   end
end

# we get the right answer
@show result, err = cuhre(int, M, 1, atol=atol, rtol=rtol);
@show result, err = cuhre(int_thread_col, M, 1, atol=atol, rtol=rtol,nvec=100);
@show result, err = cuhre(int_thread_el, M, 1, atol=atol, rtol=rtol,nvec=100);


In [None]:
@btime cuhre($(int), $M, 1, atol=$atol, rtol=$rtol) # slow

println("multithread")
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000))
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000))

println("multithread and create product in loop")
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000))
@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000));

# conclusions
# int_thread_el seems faster than int_thread_col
# nvec seems important for performance, looks like we want high nvec

In [None]:
# let's try without index checking
function int_thread_col_in(x, f)
    Threads.@threads for i in 1:size(x,2)
      @inbounds f[i] = pdf(Product(Beta.(1.0,2.0*ones(M))),@view(x[:,i]))
    end
end

function int_thread_el_in(x,f)
   f[1,:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       for i in 1:size(x, 1)
           @inbounds f[1, j] *= pdf(Beta(1.0,2.0),@view(x[i,j]))
       end
   end
end

nvec=10000000
@btime cuhre($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$nvec)
@btime cuhre($int_thread_col_in, $M, 1, atol=$atol, rtol=$rtol,nvec=$nvec)

@btime cuhre($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$nvec)
@btime cuhre($int_thread_el_in, $M, 1, atol=$atol, rtol=$rtol,nvec=$nvec);

# conclusion
# using @inbounds does not seem to make a big difference

In [None]:
# monte carlo divonne

# wrong answer
@show result, err = divonne(int, M, 1, atol=atol, rtol=rtol); # output suggests increasing maxevals

In [None]:
# better
maxevals=1174707384

@show result, err = divonne(int, M, 1, atol=atol, rtol=rtol,maxevals=maxevals);
@show result, err = divonne(int_thread_col, M, 1, atol=atol, rtol=rtol,nvec=nvec,maxevals=maxevals);
@show result, err = divonne(int_thread_el, M, 1, atol=atol, rtol=rtol,nvec=nvec,maxevals=maxevals);

In [None]:
@btime divonne($(int), $M, 1, atol=$atol, rtol=$rtol,maxevals=maxevals)

println("multithread")
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000),maxevals=maxevals)
@btime divonne($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000),maxevals=maxevals)

println("multithread and create product in loop")
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000),maxevals=maxevals)
@btime divonne($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000),maxevals=maxevals);


In [None]:
# monte carlo suave

nvec=15000000
maxevals=300000000
nmin=2
nnew=80000
flatness=150

@show result, err = suave(int, M, 1, atol=atol, rtol=rtol,maxevals=maxevals,nnew=nnew,nmin=nmin,flatness=flatness); 
@show result, err = suave(int_thread_col, M, 1, atol=atol, rtol=rtol,maxevals=maxevals, nvec=nvec,nnew=nnew,nmin=nmin,flatness=flatness); 
@show result, err = suave(int_thread_el, M, 1, atol=atol, rtol=rtol,maxevals=maxevals, nvec=nvec,nnew=nnew,nmin=nmin,flatness=flatness); 

In [None]:
@btime suave($(int), $M, 1, atol=$atol, rtol=$rtol,maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness) # fast

println("multithread")
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_col, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 

println("multithread and create product in loop")
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(16),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(100000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(1000000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 
@btime suave($int_thread_el, $M, 1, atol=$atol, rtol=$rtol,nvec=$(10000000),maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness); 

In [None]:
# ### Default values of parameters
# # Common arguments.
# const NVEC      = 1
# const RTOL      = 1e-4
# const ATOL      = 1e-12
# const FLAGS     = 0
# const SEED      = 0
# const MINEVALS  = 0
# const MAXEVALS  = 1000000
# const STATEFILE = ""
# const SPIN      = C_NULL

# # Vegas-specific arguments.
# const NSTART    = 1000
# const NINCREASE = 500
# const NBATCH    = 1000
# const GRIDNO    = 0

# # Suave-specific arguments.
# const NNEW     = 1000
# const NMIN     = 2
# const FLATNESS = 25.0

# # Divonne-specific arguments.
# const KEY1         = 47
# const KEY2         = 1
# const KEY3         = 1
# const MAXPASS      = 5
# const BORDER       = 0.0
# const MAXCHISQ     = 10.0
# const MINDEVIATION = 0.25
# const NGIVEN       = 0
# const LDXGIVEN     = 0
# const XGIVEN       = 0
# const NEXTRA       = 0
# const PEAKFINDER   = C_NULL

# # Cuhre-specific argument.
# const KEY = 0