In [168]:
using Random
using Distributions
using DataFrames, CSV
include("statistics.jl")
include("simulation.jl")
include("data.jl")
include("util.jl")

partition (generic function with 1 method)

In [169]:
"""
CONFIG
"""

pooled = false
alpha = 0.05
isTwoSided = false

# data
B  = 5      # num. coverage probabilities per boxplot
S  = 10     # num. samples per coverage probability
nx = 6      # size of group 1
ny = 6      # size of group 2
dtype = Float32

# distributions
Random.seed!(123)

distrTypeX = Gamma
shapeX = rand(1:5, B)
scaleX = 4 * shapeX

distrTypeY = Gamma
shapeY = rand(1:5, B)
scaleY = 4 * shapeY

5-element Vector{Int64}:
  8
  4
 20
 12
  8

In [170]:
# generate data
x, y, wide, narrow, deltas, distrX, distrY = generateData(B, S, nx, ny,
                                                          distrTypeX, (shapeX, scaleX),
                                                          distrTypeY, (shapeY, scaleY))
@show size(x)
@show size(y)
@show size(wide)
@show size(narrow)
@show size(deltas)
@show distrX[1]
@show distrY[1]

size(x) = (5, 10, 6)
size(y) = (5, 10, 6)
size(wide) = (5, 10)
size(narrow) = (5, 10)
size(deltas) = (5,)
distrX[1] = Gamma{Float64}(α=3.0, θ=12.0)
distrY[1] = Gamma{Float64}(α=2.0, θ=8.0)


Gamma{Float64}(α=2.0, θ=8.0)

In [171]:
# wrap constant arguments into an iterable object
# this allows each batch to have local access

@enum Alternative less greater twoSided

struct Args
    px::AbstractMatrix{Int}
    py::AbstractMatrix{Int}
    pooled::Bool
    alpha::Float32
    alt_lo::Alternative
    alt_hi::Alternative
end

px, py = partition(nx, ny)

if isTwoSided
    args = Args(px, py, pooled, alpha, twoSided, twoSided)
else
    args = Args(px, py, pooled, alpha / 2, greater, less)
end

Base.Broadcast.broadcastable(args::Args) = Ref(args)  # make object iterable
args

Args([1 2 … 5 6; 1 2 … 5 7; … ; 6 8 … 11 12; 7 8 … 11 12], [12 11 … 8 7; 12 11 … 8 6; … ; 7 5 … 2 1; 6 5 … 2 1], false, 0.025f0, greater, less)

In [172]:
@time begin
results = coverage.(eachslice(x, dims=1),
                    eachslice(y, dims=1),
                    eachrow(wide),
                    eachrow(narrow),
                    deltas,
                    args)
end

  0.415989 seconds (1.88 M allocations: 201.326 MiB, 5.32% gc time, 88.78% compilation time)


5-element Vector{Tuple{Float32, Float32}}:
 (0.9, 39.562084)
 (0.9, 37.43362)
 (1.0, 99.32058)
 (0.9, 36.45017)
 (0.8, 38.431763)

In [173]:
probs  = [x for (x, _) in results]
widths = [x for (_, x) in results]
df = DataFrame(prob=probs, width=widths, distrX=distrX, distrY=distrY)

Unnamed: 0_level_0,prob,width,distrX,distrY
Unnamed: 0_level_1,Float32,Float32,Gamma…,Gamma…
1,0.9,39.5621,"Gamma{Float64}(α=3.0, θ=12.0)","Gamma{Float64}(α=2.0, θ=8.0)"
2,0.9,37.4336,"Gamma{Float64}(α=3.0, θ=12.0)","Gamma{Float64}(α=1.0, θ=4.0)"
3,1.0,99.3206,"Gamma{Float64}(α=5.0, θ=20.0)","Gamma{Float64}(α=5.0, θ=20.0)"
4,0.9,36.4502,"Gamma{Float64}(α=1.0, θ=4.0)","Gamma{Float64}(α=3.0, θ=12.0)"
5,0.8,38.4318,"Gamma{Float64}(α=3.0, θ=12.0)","Gamma{Float64}(α=2.0, θ=8.0)"


In [181]:
filename = (isTwoSided ? "twoSided" : "oneSided") * "_" * (pooled ? "" : "un") * "pooled" * "_" * string(alpha) * ".csv"
CSV.write("../results/" * filename, df)

"../results/oneSided_unpooled_0.05.csv"