In [16]:
using Random
using Distributions
using FLoops
using Base.Threads
using BenchmarkTools

include("data.jl")
include("util.jl")

include("intervals/permutation.jl")
include("intervals/bootstrap.jl")
include("intervals/t.jl")

Threads.nthreads()

112

In [17]:
dtype = Float32
seed = 123

# DATA CONFIG

alpha = 0.05

# data
B  = 100   # num. coverage probabilities per boxplot
S  = 4300  # num. samples per coverage probability
nx = 200   # size of group 1
ny = 100    # size of group 2
nperm = try
    binomial(nx+ny, nx)
catch OverflowError
    Inf
end
    
if nperm > 30_000
    px, py = partition(nx, ny, 10_000)
else
    px, py = partition(nx, ny)
end

bits = vcat(ones(Int, nx), zeros(Int, ny))
addx, addy = bits[px], bits[py]

([1 0 … 0 1; 1 1 … 0 1; … ; 1 1 … 1 1; 1 1 … 1 1], [0 1 … 0 0; 0 1 … 0 1; … ; 0 1 … 0 1; 0 0 … 0 1])

In [18]:
# POPULATION SETTINGS

Random.seed!(123)

distrTypeX = LogNormal{dtype}
X_mu = random(Uniform(0, 1), B)
X_sigma = random(Uniform(0, 0.6), B)
distrX = map(distrTypeX, X_mu, X_sigma)

distrTypeY = Gamma{dtype}
Y_shape = @. 1 / (exp(X_sigma^2) - 1)
Y_scale = @. exp(X_mu + X_sigma^2 / 2) / Y_shape
distrY = map(distrTypeY, Y_shape, Y_scale)
;

In [19]:
deltas = @. mean(distrX) - mean(distrY)

@show distrX[1:2]
@show distrY[1:2]
@show deltas[1:2];

distrX[1:2] = LogNormal{Float32}[LogNormal{Float32}(μ=0.906f0, σ=0.572f0), LogNormal{Float32}(μ=0.443f0, σ=0.508f0)]
distrY[1:2] = Gamma{Float32}[Gamma{Float32}(α=2.5836008f0, θ=1.1279569f0), Gamma{Float32}(α=3.3964891f0, θ=0.5216751f0)]
deltas[1:2] = Float32[0.0, 0.0]


In [20]:
Random.seed!(123)
xs = [dtype.(rand(distrX[i], nx, S)) for i in 1:B]
ys = [dtype.(rand(distrY[i], ny, S)) for i in 1:B]
@show size(ys)
ys[2]

size(ys) = (100,)


100×4300 Matrix{Float32}:
 0.938853  1.00503   3.40909   1.0114    …  1.09361   1.27738   1.34061
 1.26308   1.71532   1.74553   2.44234      1.70738   1.46919   1.12799
 4.0256    2.48289   2.49671   2.18975      0.886978  1.2627    0.764371
 1.00635   0.282019  2.07133   1.60065      1.27084   2.55679   1.49512
 1.36648   2.65669   0.983595  1.67379      1.96715   1.91593   1.81058
 3.71419   2.5243    2.02421   1.93055   …  2.02448   1.30205   0.90291
 0.99162   1.3175    0.969833  2.30885      1.48678   1.1413    0.729581
 1.30761   1.89309   1.35754   0.351206     1.05594   1.91375   2.20303
 3.43932   1.2984    1.49306   2.02629      1.588     0.44524   2.25513
 1.89935   1.86558   2.72272   3.50044      1.27078   1.33801   0.689221
 1.0599    0.277075  2.2834    5.51918   …  2.95257   1.35119   1.61859
 2.5917    1.37948   3.58566   2.60645      1.9195    1.93142   1.72451
 0.495102  1.76342   1.25715   0.616498     3.14038   2.03003   1.84618
 ⋮                                 

In [21]:
# flatten into 3D matrix
X = reshape(hcat(xs...), nx, S, B)
Y = reshape(hcat(ys...), ny, S, B)
Y[:,:,2]

100×4300 Matrix{Float32}:
 0.938853  1.00503   3.40909   1.0114    …  1.09361   1.27738   1.34061
 1.26308   1.71532   1.74553   2.44234      1.70738   1.46919   1.12799
 4.0256    2.48289   2.49671   2.18975      0.886978  1.2627    0.764371
 1.00635   0.282019  2.07133   1.60065      1.27084   2.55679   1.49512
 1.36648   2.65669   0.983595  1.67379      1.96715   1.91593   1.81058
 3.71419   2.5243    2.02421   1.93055   …  2.02448   1.30205   0.90291
 0.99162   1.3175    0.969833  2.30885      1.48678   1.1413    0.729581
 1.30761   1.89309   1.35754   0.351206     1.05594   1.91375   2.20303
 3.43932   1.2984    1.49306   2.02629      1.588     0.44524   2.25513
 1.89935   1.86558   2.72272   3.50044      1.27078   1.33801   0.689221
 1.0599    0.277075  2.2834    5.51918   …  2.95257   1.35119   1.61859
 2.5917    1.37948   3.58566   2.60645      1.9195    1.93142   1.72451
 0.495102  1.76342   1.25715   0.616498     3.14038   2.03003   1.84618
 ⋮                                 

In [22]:
function save_ci_results(results, methodId, B, S, pooled=nothing, two_sided=nothing; prefix="", dir="./")
    averages = []

    for batchId in 1:B
        batch = results[methodId, batchId, :]
        coverage = sum([j for (j, _) in batch]) / S
        width = sum([j for (_, j) in batch]) / S
        push!(averages, (coverage, width))
    end
    if isnothing(two_sided)
        save(averages, distrX[1:B], distrY[1:B], alpha, prefix=prefix, dir=dir)
    else
        alpha_ = two_sided ? alpha : alpha / 2
        save(averages, distrX[1:B], distrY[1:B], alpha_, pooled, two_sided, prefix=prefix, dir=dir)
    end
end

function save_permutation_results(results, B, S; prefix="", dir="./")
    i = 1
    per_method = []
    for two_sided in [true, false]
        for pooled in [true, false]
            if i in [2, 4]  # use only unpooled for now
                save_ci_results(results, i, B, S, pooled, two_sided, prefix=prefix, dir=dir)
            end
            i += 1
        end
    end
end

save_permutation_results (generic function with 1 method)

In [23]:
function cache(groups, masks)
    mean_og = mean(groups, dims=1)
    var_og = var(groups, dims=1)
    nshift = sum(masks, dims=1)
    shift_sum = sum(groups .* masks, dims=1)
    return P(mean_og, var_og, nshift, shift_sum, size(groups, 1))
end

cache (generic function with 1 method)

In [25]:
x = X[:,1,1]
y = Y[:,1,1]
pooled = vcat(x, y)
xs = pooled[px]
ys = pooled[py]
xcache = cache(xs, addx)
ycache = cache(ys, addy)

wide, narrow = t_estimates(x, y, false)
@time permInterval(xcache, ycache, wide, narrow, false, alpha, twoSided, twoSided, 0.005)

  0.010364 seconds (2.03 k allocations: 13.605 MiB)


(-0.7104112703032146, 0.28976174179979663)

In [26]:
T = Threads.nthreads()
results = Array{Union{Tuple, Nothing}, 3}(nothing, 6, B, S)

#@time Threads.@threads for (i,j) in collect(Iterators.product(1:B, 1:S)) # 15.52 sec on (B,S) = (5, 1800)
@time @floop ThreadedEx(basesize=div(B*S, T)) for b in 1:10, s in 1:10
    @inbounds x = X[:,s,b]
    @inbounds y = Y[:,s,b]
    
    pooled = vcat(x, y)
    xs = @inbounds pooled[px]
    ys = @inbounds pooled[py]
    xcache = cache(xs, addx)
    ycache = cache(ys, addy)
    wide, narrow = t_estimates(x, y, false)
 
    #results[1, b, s] = permInterval(xcache, ycache, deltas[b], true, alpha, twoSided, twoSided, 0.0005)    
    results[2, b, s] = permInterval(xcache, ycache, wide, narrow, deltas[b], false, alpha, twoSided, twoSided, 0.0005)
    #results[3, b, s] = permInterval(xcache, ycache, deltas[b], true, alpha/2, greater, smaller, 0.0005)
    results[4, b, s] = permInterval(xcache, ycache, wide, narrow, deltas[b], false, alpha/2, greater, smaller, 0.0005)
    
    """
    results[5, b, s] = bootstrap(x, y, deltas[b], alpha, nsamples=10_000)
    results[6, b, s] = tconf(x, y, deltas[b], alpha, false)
    """
end

dir = "../results/" * string(nx) * "_" * string(ny) * "/2/"
save_permutation_results(results, B, S; dir=dir)
#save_ci_results(results, 6, B, S; prefix="bs", dir=dir)

  3.212737 seconds (875.33 k allocations: 5.528 GiB, 9.33% gc time, 7.34% compilation time)


"../results/200_100/2/"