In [1]:
import Pkg
Pkg.activate("joint_timing")
Pkg.instantiate()

using Cuba, Distributions
using BenchmarkTools, Test, CUDA
using FLoops, FoldsCUDA
using SpecialFunctions

[32m[1m  Activating[22m[39m environment at `~/SageMaker/networks_hub/SAN/code/network_simulations/joint_timing/Project.toml`


In [15]:
M=25 # number of independent beta random variables
atol=1e-6
rtol=1e-3

# integrate the pdf of the joint distribution -- should always equal 1
function int(x, f)
   f[1] = pdf(Product(Beta.(1.0,2.0*ones(M))),x)
end

# multithread
function int_thread_col(x, f)
    Threads.@threads for i in 1:size(x,2)
      f[i] = pdf(Product(Beta.(1.0,2.0*ones(M))),@view(x[:,i]))
    end
end

# multithread and loop to create product distribution
function int_thread_el(x,f)
   f[1,:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       for i in 1:size(x, 1)
           f[1, j] *= pdf(Beta(1.0,2.0),@view(x[i,j]))
       end
   end
end

int_thread_el (generic function with 1 method)

In [23]:
function pdf_beta(x::Array,alpha::T,beta::T) where T
    x.^(alpha .- 1.0).*(1.0 .- x).^(beta .- 1.0)./SpecialFunctions.beta(1.0,2.0)
end

function pdf_beta(f,x,alpha,beta)
    f=x.^(alpha-1.0).*(1.0 .- x).^(beta-1.0)./SpecialFunctions.beta(1.0,2.0)
end

display(pdf_beta(0.2,1.0,2.0))
display(pdf_beta(0.0,0.2,1.0,2.0))

display(pdf_beta([0.2,0.3],1.0,2.0))
display(pdf_beta([0.0,0.0],[0.2,0.3],1.0,2.0))


1.6

1.6

2-element Vector{Float64}:
 1.6
 1.4

2-element Vector{Float64}:
 1.6
 1.4

In [29]:
function pdf_beta(x,a,b,dim)
    prod(x.^(a-1.0f0) .* (1.0f0 .- x).^(b-1.0f0)./(gamma(a)*gamma(b)/gamma(a+b)),dims=dim)
end


In [35]:
pdf(Product(Beta.(1.0,2.0*ones(2))),[0.2,0.3])


2.2399999999999998

In [30]:

nvec=15000000
maxevals=300000000
nmin=2
nnew=80000
flatness=150

@show result, err = suave(int, M, 1, atol=atol, rtol=rtol,maxevals=maxevals,nnew=nnew,nmin=nmin,flatness=flatness); 
@show result, err = suave(int2, M, 1, atol=atol, rtol=rtol,maxevals=maxevals,nnew=nnew,nmin=nmin,flatness=flatness); 


(result, err) = suave(int, M, 1, atol = atol, rtol = rtol, maxevals = maxevals, nnew = nnew, nmin = nmin, flatness = flatness) = Component:
 1: 0.9964268414111838 ± 0.0009925145844409677 (prob.: 1.0)
Integrand evaluations: 5360000
Number of subregions:  67
Note: The desired accuracy was reached


LoadError: MethodError: [0mCannot `convert` an object of type [92mVector{Float64}[39m[0m to an object of type [91mFloat64[39m
[0mClosest candidates are:
[0m  convert(::Type{T}, [91m::Union{InitialValues.SpecificInitialValue{typeof(*)}, InitialValues.SpecificInitialValue{typeof(Base.mul_prod)}}[39m) where T<:Union{AbstractString, Number} at /home/ec2-user/.julia/packages/InitialValues/EPz1F/src/InitialValues.jl:257
[0m  convert(::Type{T}, [91m::LLVM.GenericValue[39m, [91m::LLVM.LLVMType[39m) where T<:AbstractFloat at /home/ec2-user/.julia/packages/LLVM/7Q46C/src/execution.jl:39
[0m  convert(::Type{T}, [91m::LLVM.ConstantFP[39m) where T<:AbstractFloat at /home/ec2-user/.julia/packages/LLVM/7Q46C/src/core/value/constant.jl:98
[0m  ...

In [2]:
# multithread
function int_thread_col(x, f)
    Threads.@threads for i in 1:size(x,2)
      f[i] = pdf(Product(Beta.(1.0,2.0*ones(M))),@view(x[:,i]))
    end
end

# multithread and loop to create product distribution
function int_thread_el(x,f)
   f[1,:] .= 1.0
   Threads.@threads for j in 1:size(x,2)
       for i in 1:size(x, 1)
           f[1, j] *= pdf(Beta(1.0,2.0),@view(x[i,j]))
       end
   end
end

In [2]:
# monte carlo suave

nvec=15000000
maxevals=300000000
nmin=2
nnew=80000
flatness=150

@show result, err = suave(int, M, 1, atol=atol, rtol=rtol,maxevals=maxevals,nnew=nnew,nmin=nmin,flatness=flatness); 
@show result, err = suave(int_thread_col, M, 1, atol=atol, rtol=rtol,maxevals=maxevals, nvec=nvec,nnew=nnew,nmin=nmin,flatness=flatness); 
@show result, err = suave(int_thread_el, M, 1, atol=atol, rtol=rtol,maxevals=maxevals, nvec=nvec,nnew=nnew,nmin=nmin,flatness=flatness);

In [2]:
# User Inputs
M= 5 # number of independent uniform random variables
atol=1e-10
rtol=1e-10
nvec=100000
maxevals=10000

# Initializing Matrices
ones_mat = CuArray(ones(Float32, M))
result = CUDA.ones(Float32, (nvec,1))
x_cpu = rand(Float64, (M, nvec))
x = CuArray(x_cpu)
x_1d_cpu = rand(Float64, M)
x_1d_gpu = CuArray(x_1d_cpu)

[32m[1m Downloading[22m[39m artifact: CUDA110
[32m[1m Downloading[22m[39m artifact: CUDNN_CUDA110
[32m[1m Downloading[22m[39m artifact: CUTENSOR_CUDA110
│ Some functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
└ @ CUDA /home/ec2-user/.julia/packages/CUDA/k52QH/src/state.jl:224


5-element CuArray{Float64, 1}:
 0.09862817519574185
 0.37211988789676176
 0.9038049126217622
 0.2645842797714655
 0.934906268138505

In [3]:
@btime suave($(int), $M, 1, atol=$atol, rtol=$rtol,maxevals=$maxevals, nnew=$nnew,nmin=$nmin,flatness=$flatness) # fast

CuArray{Float32, 1}

In [7]:
typeof( CuArray(x_cpu))

CuArray{Float64, 2}

In [2]:
Pkg.test("CUDA")

[32m[1m     Testing[22m[39m CUDA
[32m[1m      Status[22m[39m `/tmp/jl_tUYAd6/Project.toml`
 [90m [79e6a3ab] [39m[37mAdapt v3.3.0[39m
 [90m [ab4f0b2a] [39m[37mBFloat16s v0.1.0[39m
 [90m [052768ef] [39m[37mCUDA v3.1.0[39m
 [90m [864edb3b] [39m[37mDataStructures v0.18.9[39m
 [90m [7a1cc6ca] [39m[37mFFTW v1.4.1[39m
 [90m [1a297f60] [39m[37mFillArrays v0.11.7[39m
 [90m [0c68f7d7] [39m[37mGPUArrays v6.2.2[39m
 [90m [a98d9a8b] [39m[37mInterpolations v0.13.2[39m
 [90m [872c559c] [39m[37mNNlib v0.7.19[39m
 [90m [ade2ca70] [39m[37mDates `@stdlib/Dates`[39m
 [90m [8ba89e20] [39m[37mDistributed `@stdlib/Distributed`[39m
 [90m [37e2e46d] [39m[37mLinearAlgebra `@stdlib/LinearAlgebra`[39m
 [90m [de0858da] [39m[37mPrintf `@stdlib/Printf`[39m
 [90m [3fa0cd96] [39m[37mREPL `@stdlib/REPL`[39m
 [90m [9a3f8284] [39m[37mRandom `@stdlib/Random`[39m
 [90m [2f01184e] [39m[37mSparseArrays `@stdlib/SparseArrays`[39m
 [90m [10745b16] [39m[

[0m                                          | [37m         | ---------------- GPU ---------------- | ---------------- CPU ---------------- |[39m
[37mTest[39m[37m                             (Worker) | [39m[37mTime (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) | GC (s) | GC % | Alloc (MB) | RSS (MB) |[39m


[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
[33m[1m└ [22m[39m[90m@ CUDA ~/.julia/packages/CUDA/k52QH/src/state.jl:224[39m
[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
[33m[1m└ [22m[39m[90m@ CUDA ~/.julia/packages/CUDA/k52QH/src/state.jl:224[39m
[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
[33m[1m└ [22m[39m[90m@ CUDA ~/.julia/packages/CUDA/k52QH/src/state.jl:224[39m
[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
[33m[1m└ [22m[39m[90m@ CUDA ~/.julia/packages/CUDA/k52QH/src/state.jl:224[39m
[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
[33m[1m└ [22m[39m[90m@ CUDA ~/.jul

[37mapiutils[39m[37m                              (3) | [39m[37m    2.31 | [39m[37m  0.00 | [39m[37m 0.0 | [39m[37m      0.00 | [39m[37m   56.25 | [39m[37m  0.00 | [39m[37m 0.0 | [39m[37m      5.53 | [39m[37m  838.73 |[39m
[37mnvtx[39m[37m                                 (16) | [39m[37m    3.08 | [39m[37m  0.00 | [39m[37m 0.0 | [39m[37m      0.00 | [39m[37m   56.25 | [39m[37m  0.09 | [39m[37m 2.9 | [39m[37m     31.42 | [39m[37m  838.73 |[39m
[37mnvml[39m[37m                                 (15) | [39m[37m    6.13 | [39m[37m  0.00 | [39m[37m 0.0 | [39m[37m      0.00 | [39m[37m   56.25 | [39m[37m  0.11 | [39m[37m 1.8 | [39m[37m     50.11 | [39m[37m  838.73 |[39m
[37mcurand[39m[37m                                (9) | [39m[37m    5.89 | [39m[37m  0.00 | [39m[37m 0.0 | [39m[37m      0.00 | [39m[37m   60.25 | [39m[37m  0.18 | [39m[37m 3.1 | [39m[37m     48.56 | [39m[37m  838.73 |[39m
[37mpointer[39m[3

[33m[1m└ [22m[39m[90m@ CUDA.CUDNN ~/.julia/packages/CUDA/k52QH/lib/cudnn/dropout.jl:40[39m


[37mcudnn/activation[39m[37m                     (14) | [39m[37m   20.18 | [39m[37m  0.03 | [39m[37m 0.1 | [39m[37m      0.00 | [39m[37m  109.25 | [39m[37m  0.71 | [39m[37m 3.5 | [39m[37m    248.54 | [39m[37m  956.41 |[39m
[37mcodegen[39m[37m                               (6) | [39m[37m  254.56 | [39m[37m  5.74 | [39m[37m 2.3 | [39m[37m      0.00 | [39m[37m   97.12 | [39m[37m  6.05 | [39m[37m 2.4 | [39m[37m   2547.68 | [39m[37m  838.73 |[39m
[37mcudnn/dropout[39m[37m                         (2) | [39m[37m   47.67 | [39m[37m  0.00 | [39m[37m 0.0 | [39m[37m      0.63 | [39m[37m  115.75 | [39m[37m  1.30 | [39m[37m 2.7 | [39m[37m    516.37 | [39m[37m  969.43 |[39m


[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
[33m[1m└ [22m[39m[90m@ CUDA ~/.julia/packages/CUDA/k52QH/src/state.jl:224[39m


[37mrandom[39m[37m                               (16) | [39m[37m  277.55 | [39m[37m  5.32 | [39m[37m 1.9 | [39m[37m      0.02 | [39m[37m   63.75 | [39m[37m  7.39 | [39m[37m 2.7 | [39m[37m   2892.02 | [39m[37m  838.73 |[39m
[37mcudnn/optensor[39m[37m                        (2) | [39m[37m   18.73 | [39m[37m  0.05 | [39m[37m 0.3 | [39m[37m      0.00 | [39m[37m  109.25 | [39m[37m  0.72 | [39m[37m 3.8 | [39m[37m    241.95 | [39m[37m  978.89 |[39m
[37mcudnn/reduce[39m[37m                          (2) | [39m[37m   28.25 | [39m[37m  0.02 | [39m[37m 0.1 | [39m[37m      0.02 | [39m[37m  109.25 | [39m[37m  1.12 | [39m[37m 4.0 | [39m[37m    371.64 | [39m[37m  985.86 |[39m
[37mcudnn/inplace[39m[37m                        (18) | [39m[37m   36.06 | [39m[37m  5.44 | [39m[37m15.1 | [39m[37m      0.01 | [39m[37m  111.25 | [39m[37m  0.92 | [39m[37m 2.5 | [39m[37m    392.57 | [39m[37m  871.72 |[39m
[37mcudnn/softmax[

[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
[33m[1m└ [22m[39m[90m@ CUDA ~/.julia/packages/CUDA/k52QH/src/state.jl:224[39m


[37mgpuarrays/math[39m[37m                        (5) | [39m[37m   53.04 | [39m[37m  0.02 | [39m[37m 0.0 | [39m[37m      0.00 | [39m[37m   61.75 | [39m[37m  1.78 | [39m[37m 3.4 | [39m[37m    695.13 | [39m[37m  870.58 |[39m
[37mcusolver/sparse[39m[37m                      (14) | [39m[37m  215.37 | [39m[37m  0.01 | [39m[37m 0.0 | [39m[37m      0.19 | [39m[37m  202.75 | [39m[37m  6.25 | [39m[37m 2.9 | [39m[37m   2323.41 | [39m[37m 1390.13 |[39m
      From worker 11:	[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
      From worker 11:	[33m[1m└ [22m[39m[90m@ CUDA ~/.julia/packages/CUDA/k52QH/src/state.jl:224[39m
[37mgpuarrays/input output[39m[37m               (14) | [39m[37m   29.56 | [39m[37m  0.02 | [39m[37m 0.1 | [39m[37m      0.00 | [39m[37m   57.25 | [39m[37m  2.45 | [39m[37m 8.3 | [39m[37m    249.41 | [39m[37m 1390.13 |[39m
[37mcusolve

[33m[1m│ [22m[39mSome functionality might not work. For a fully-supported set-up, please use an older version of CUDA.jl
[33m[1m└ [22m[39m[90m@ CUDA ~/.julia/packages/CUDA/k52QH/src/state.jl:224[39m


[37mexamples[39m[37m                             (11) | [39m[37m 1088.26 | [39m[37m  0.00 | [39m[37m 0.0 | [39m[37m      0.00 | [39m[37m   56.25 | [39m[37m  0.09 | [39m[37m 0.0 | [39m[37m     44.51 | [39m[37m  838.73 |[39m
[37mgpuarrays/random[39m[37m                      (8) | [39m[37m  300.52 | [39m[37m  0.03 | [39m[37m 0.0 | [39m[37m      0.03 | [39m[37m   63.75 | [39m[37m  8.42 | [39m[37m 2.8 | [39m[37m   3382.40 | [39m[37m 1052.46 |[39m
[37mexecution[39m[37m                            (13) | [39m[37m 1131.12 | [39m[37m  5.81 | [39m[37m 0.5 | [39m[37m      0.02 | [39m[37m  109.62 | [39m[37m 31.81 | [39m[37m 2.8 | [39m[37m  12604.10 | [39m[37m 1003.78 |[39m
[37mgpuarrays/base[39m[37m                        (6) | [39m[37m  315.41 | [39m[37m  0.01 | [39m[37m 0.0 | [39m[37m     17.44 | [39m[37m  101.75 | [39m[37m 13.27 | [39m[37m 4.2 | [39m[37m   4701.24 | [39m[37m 1510.96 |[39m
[37mcublas[39m[37

[91m[1mERROR: [22m[39mLoadError: [91mTest run finished with errors[39m
in expression starting at /home/ec2-user/.julia/packages/CUDA/k52QH/test/runtests.jl:490


LoadError: Package CUDA errored during testing