In [1]:
using Random
using Distributions
using FLoops
using Base.Threads
using BenchmarkTools

include("data.jl")
include("util.jl")

include("intervals/permutation.jl")
include("intervals/bootstrap.jl")
include("intervals/t.jl")

Threads.nthreads()

8

In [21]:
dtype = Float32
seed = 123

# DATA CONFIG

alpha = 0.05

# data
B  = 100   # num. coverage probabilities per boxplot
S  = 4300  # num. samples per coverage probability
nx = 8   # size of group 1
ny = 9    # size of group 2
nperm = try
    binomial(nx+ny, nx)
catch OverflowError
    Inf
end
    
if nperm > 30_000
    Random.seed!(123)
    px, py = partition(nx, ny, 10_000)
else
    px, py = partition(nx, ny)
end

bits = vcat(ones(Int, nx), zeros(Int, ny))
addx, addy = bits[px], bits[py]
px[:, 1:5]

8×5 Matrix{Int64}:
 1  1   1   1   1
 2  2   2   2   2
 3  3   3   3   3
 4  4   4   4   4
 5  5   5   5   5
 6  6   6   6   6
 7  7   7   7   7
 8  9  10  11  12

In [22]:
# POPULATION SETTINGS

Random.seed!(123)

distrTypeX = Laplace{dtype}
X_mu = random(Uniform(0, 1), B)
X_sigma = random(Uniform(2, 4), B)
distrX = map(distrTypeX, X_mu, X_sigma)

distrTypeY = Laplace{dtype}
Y_mu = X_mu .+ random(Uniform(-5, 5), B)
Y_sigma = X_sigma
distrY = map(distrTypeY, Y_mu, Y_sigma)
;

In [23]:
deltas = @. mean(distrX) - mean(distrY)

@show distrX[1:2]
@show distrY[1:2]
@show deltas[1:2];

distrX[1:2] = Laplace{Float32}[Laplace{Float32}(μ=0.906f0, θ=3.908f0), Laplace{Float32}(μ=0.443f0, θ=3.692f0)]
distrY[1:2] = Laplace{Float32}[Laplace{Float32}(μ=5.556f0, θ=3.908f0), Laplace{Float32}(μ=-3.8500001f0, θ=3.692f0)]
deltas[1:2] = Float32[-4.65, 4.293]


In [24]:
Random.seed!(123)
xs = [dtype.(rand(distrX[i], nx, S)) for i in 1:B]
ys = [dtype.(rand(distrY[i], ny, S)) for i in 1:B]
@show size(ys)
ys[2]

size(ys) = (100,)


9×4300 Matrix{Float32}:
  1.21238   -12.0656    -5.72501   …  -3.54165  -2.86373   -3.68552
 -4.94164   -13.4604    -7.78351      -5.8675   -3.98819   -9.06254
 -1.21379    -3.74062   -3.03806      -7.25893  -3.16679   -5.64835
 -2.43914     1.42707   -4.50381      -3.06727  -4.6536    -3.30118
  1.13805     0.859749  -1.74957       3.21269  -6.97413   -5.29728
 -2.06508    -1.35375    0.841032  …  -4.97212  -4.14999   -1.05894
  0.891501   -6.42806   -0.44811      -1.40522  -3.59966  -28.5312
 -4.14019    -7.35408   -5.63809      -4.13824  -2.57561    4.7373
  1.71894    16.3879    -4.05828      -3.65615  -9.83951   -4.64191

In [25]:
# flatten into 3D matrix
X = reshape(hcat(xs...), nx, S, B)
Y = reshape(hcat(ys...), ny, S, B)
Y[:,:,2]

9×4300 Matrix{Float32}:
  1.21238   -12.0656    -5.72501   …  -3.54165  -2.86373   -3.68552
 -4.94164   -13.4604    -7.78351      -5.8675   -3.98819   -9.06254
 -1.21379    -3.74062   -3.03806      -7.25893  -3.16679   -5.64835
 -2.43914     1.42707   -4.50381      -3.06727  -4.6536    -3.30118
  1.13805     0.859749  -1.74957       3.21269  -6.97413   -5.29728
 -2.06508    -1.35375    0.841032  …  -4.97212  -4.14999   -1.05894
  0.891501   -6.42806   -0.44811      -1.40522  -3.59966  -28.5312
 -4.14019    -7.35408   -5.63809      -4.13824  -2.57561    4.7373
  1.71894    16.3879    -4.05828      -3.65615  -9.83951   -4.64191

In [27]:
function save_ci_results(results, methodId, B, S, pooled=nothing, two_sided=nothing; prefix="", dir="./")
    averages = []

    for batchId in 1:B
        batch = results[methodId, batchId, :]
        coverage = sum([j for (j, _) in batch]) / S
        width = sum([j for (_, j) in batch]) / S
        push!(averages, (coverage, width))
    end
    if isnothing(two_sided)
        save(averages, distrX[1:B], distrY[1:B], alpha, prefix=prefix, dir=dir)
    else
        alpha_ = two_sided ? alpha : alpha / 2
        save(averages, distrX[1:B], distrY[1:B], alpha_, pooled, two_sided, prefix=prefix, dir=dir)
    end
end

function save_permutation_results(results, B, S; prefix="", dir="./")
    i = 1
    per_method = []
    for two_sided in [true, false]
        for pooled in [true, false]
            if i in [2, 4]  # use only unpooled for now
                save_ci_results(results, i, B, S, pooled, two_sided, prefix=prefix, dir=dir)
            end
            i += 1
        end
    end
end

save_permutation_results (generic function with 1 method)

In [28]:
function cache(groups, masks)
    mean_og = mean(groups, dims=1)
    var_og = var(groups, dims=1)
    nshift = sum(masks, dims=1)
    shift_sum = sum(groups .* masks, dims=1)
    return P(mean_og, var_og, nshift, shift_sum, size(groups, 1))
end

cache (generic function with 1 method)

In [30]:
x = X[:,1,1]
y = Y[:,1,1]
pooled = vcat(x, y)
xs = pooled[px]
ys = pooled[py]
xcache = cache(xs, addx)
ycache = cache(ys, addy)

wide, narrow = t_estimates(x, y, false)
@time permInterval(xcache, ycache, wide, narrow, false, alpha, twoSided, twoSided, 0.005)

  0.022929 seconds (1.91 k allocations: 30.788 MiB)


(-6.576183463748455, 7.559701057227282)

In [32]:
T = Threads.nthreads()
results = Array{Union{Tuple, Nothing}, 3}(nothing, 6, B, S)

#@time Threads.@threads for (i,j) in collect(Iterators.product(1:B, 1:S)) # 15.52 sec on (B,S) = (5, 1800)
@time @floop ThreadedEx(basesize=div(B*S, T)) for b in 1:B, s in 1:S
    @inbounds x = X[:,s,b]
    @inbounds y = Y[:,s,b]
    
#     pooled = vcat(x, y)
#     xs = @inbounds pooled[px]
#     ys = @inbounds pooled[py]
#     xcache = cache(xs, addx)
#     ycache = cache(ys, addy)
#     wide, narrow = t_estimates(x, y, false)
 
#     results[1, b, s] = permInterval(xcache, ycache, deltas[b], true, alpha, twoSided, twoSided, 0.0005)    
#     results[2, b, s] = permInterval(xcache, ycache, wide, narrow, deltas[b], false, alpha, twoSided, twoSided, 0.0005)
#     results[3, b, s] = permInterval(xcache, ycache, deltas[b], true, alpha/2, greater, smaller, 0.0005)
#     results[4, b, s] = permInterval(xcache, ycache, wide, narrow, deltas[b], false, alpha/2, greater, smaller, 0.0005)
    
    #"""
    results[5, b, s] = bootstrap(x, y, deltas[b], alpha, nsamples=10_000)
    results[6, b, s] = tconf(x, y, deltas[b], alpha, false)
    #"""
end

dir = "../results/" * string(nx) * "_" * string(ny) * "/4/"
# save_permutation_results(results, B, S; dir=dir)
save_ci_results(results, 5, B, S; prefix="bs", dir=dir)
save_ci_results(results, 6, B, S; prefix="t", dir=dir)

432.660652 seconds (8.63 G allocations: 803.014 GiB, 21.69% gc time, 0.03% compilation time)
dir * filename = "../results/8_9/4/bs_0.05.csv"
dir * filename = "../results/8_9/4/t_0.05.csv"


"../results/8_9/4/t_0.05.csv"

In [None]:
b = 10
s = 100
[sum(x for (x, _) in results[4, b, 1:s]) / s for b in 1:b]

10-element Vector{Float64}:
 0.95
 0.98
 0.95
 0.94
 0.95
 0.93
 0.95
 0.92
 0.94
 0.97