In [1]:
using Random
using Distributions
using DataFrames, CSV
using FLoops

# compile local files
include("statistics.jl")
include("simulation.jl")
include("data.jl")
include("util.jl")

using .TestStatistics
using .Simulation
using .Data

In [7]:
"""
CONFIG
"""
alpha = 0.05

# data
B  = 100    # num. coverage probabilities per boxplot
S  = 1800   # num. samples per coverage probability
nx = 8      # size of group 1
ny = 8      # size of group 2
px, py = partition(nx, ny)
dtype = Float32

# distribution settings
Random.seed!(123)

distrTypeX = Gamma
shapeX = dtype.(rand(Uniform(1, 5), B))
scaleX = 4 * shapeX
paramsX = (shapeX, scaleX)

distrTypeY = Gamma
shapeY = dtype.(rand(Uniform(1, 5), B))
scaleY = 4 * shapeY
paramsY = (shapeY, scaleY)

(Float32[4.816637, 4.38358, 3.3469949, 1.4872506, 1.551065, 1.2032331, 2.5457604, 4.385833, 1.4941524, 3.960081  …  3.0071483, 4.7632217, 2.9845734, 3.390739, 1.5196867, 2.6299427, 3.167543, 4.1047573, 1.2867212, 4.7307343], Float32[19.266548, 17.53432, 13.3879795, 5.9490023, 6.20426, 4.8129325, 10.183042, 17.543331, 5.9766097, 15.840324  …  12.028593, 19.052887, 11.938293, 13.562956, 6.078747, 10.519771, 12.670172, 16.41903, 5.146885, 18.922937])

In [None]:
"""
distrTypeX = Exponential
lambdaX = dtype.(rand(Uniform(1, 20), B))
paramsX = (lambdaX,)

distrTypeY = Exponential
lambdaY = dtype.(rand(Uniform(1, 20), B))
paramsY = (lambdaY,)
"""

"""
distrTypeX = LogNormal
muX = dtype.(rand(Uniform(0, 1), B))
sdX = 2 * muX
paramsX = (muX, sdX)

distrTypeY = LogNormal
muY = dtype.(rand(Uniform(0, 1), B))
sdY = 2 * muY
paramsY = (muY, sdY)
"""

In [9]:
# generate data
Random.seed!(123)
x, y, wide, narrow, deltas, distrX, distrY = generateData(B, S, nx, ny, true,
                                                          distrTypeX, paramsX,
                                                          distrTypeY, paramsY)
@show size(x)
@show size(y)
@show size(wide)
@show size(narrow)
@show deltas[1]
@show distrX[1]
@show distrY[1]
x[1:5]

size(x) = (100, 1800, 8)
size(y) = (100, 1800, 8)
size(wide) = (100, 1800)
size(narrow) = (100, 1800)
deltas[1] = -7.230133f0
distrX[1] = Gamma{Float32}(α=4.6251984f0, θ=18.500793f0)
distrY[1] = Gamma{Float32}(α=4.816637f0, θ=19.266548f0)


5-element Vector{Float32}:
  53.98391
  20.56398
 101.49282
  32.297592
  21.075668

In [10]:
# wrap constant arguments into an iterable object
# this allows each batch to have local access

struct Args
    px::AbstractMatrix{Int}
    py::AbstractMatrix{Int}
    pooled::Bool
    alpha::Float32
    alt_lo::Alternative
    alt_hi::Alternative
end

Base.Broadcast.broadcastable(obj::Args) = Ref(obj)  # make objects of this type iterable

In [11]:
basesize = ceil(Int, B / Threads.nthreads())

@time for isTwoSided in [true, false]
    alpha_temp = alpha
    alt_lo = alt_hi = twoSided
    
    if !isTwoSided
        alpha_temp = alpha / 2
        alt_lo = greater
        alt_hi = smaller
    end
        
    for pooled in [true, false]
        args = Args(px, py, pooled, alpha_temp, alt_lo, alt_hi)
        
        results = Vector{Any}(undef, B)
        @floop ThreadedEx(basesize = basesize) for b in 1:B
            results[b] = coverage(x[b,:,:], y[b,:,:], wide[b,:], narrow[b,:], deltas[b], args)
        end
        
        # convert results to DataFrame
        probs  = [x for (x, _) in results]
        widths = [x for (_, x) in results]
        df = DataFrame(prob=probs, width=widths, distrX=distrX, distrY=distrY)

        # save DataFrame as .csv
        filename = (isTwoSided ? "twoSided" : "oneSided") * "_" *
                   (pooled ? "" : "un") * "pooled" * "_" * string(args.alpha) * ".csv"
        
        #CSV.write("../results/" * filename, df)
        CSV.write(filename, df)
    end
end

4024.597083 seconds (949.25 M allocations: 22.989 TiB, 33.83% gc time, 0.01% compilation time)


In [None]:
args = Args(px, py, true, 0.05, twoSided, twoSided)
results = Vector{Any}(undef, B)
for b in 1:B
    results[b] = coverage(x[b,:,:], y[b,:,:], wide[b,:], narrow[b,:], deltas[b], args)
end
results