In [1]:
using Random
using Distributions
using DataFrames, CSV
include("statistics.jl")
include("simulation.jl")
include("data.jl")
include("util.jl")

partition (generic function with 1 method)

In [2]:
"""
CONFIG
"""

pooled = false
alpha = 0.05
isTwoSided = false

# data
B  = 100    # num. coverage probabilities per boxplot
S  = 1800   # num. samples per coverage probability
nx = 8      # size of group 1
ny = 8      # size of group 2
dtype = Float32

# distributions
Random.seed!(123)

distrTypeX = Gamma
shapeX = rand(1:5, B)
scaleX = 4 * shapeX

distrTypeY = Gamma
shapeY = rand(1:5, B)
scaleY = 4 * shapeY

100-element Vector{Int64}:
 12
  4
 12
  4
 16
 20
  8
 20
 16
 12
  8
  8
 16
  ⋮
 16
 16
  4
  4
  4
 16
 16
 12
 12
 12
  4
  8

In [3]:
# generate data
x, y, wide, narrow, deltas, distrX, distrY = generateData(B, S, nx, ny,
                                                          distrTypeX, (shapeX, scaleX),
                                                          distrTypeY, (shapeY, scaleY))
@show size(x)
@show size(y)
@show size(wide)
@show size(narrow)
@show size(deltas)
@show distrX[1]
@show distrY[1]

size(x) = (100, 1800, 8)
size(y) = (100, 1800, 8)
size(wide) = (100, 1800)
size(narrow) = (100, 1800)
size(deltas) = (100,)
distrX[1] = Gamma{Float64}(α=3.0, θ=12.0)
distrY[1] = Gamma{Float64}(α=3.0, θ=12.0)


Gamma{Float64}(α=3.0, θ=12.0)

In [4]:
# wrap constant arguments into an iterable object
# this allows each batch to have local access

@enum Alternative less greater twoSided

struct Args
    px::AbstractMatrix{Int}
    py::AbstractMatrix{Int}
    pooled::Bool
    alpha::Float32
    alt_lo::Alternative
    alt_hi::Alternative
end

px, py = partition(nx, ny)

if isTwoSided
    args = Args(px, py, pooled, alpha, twoSided, twoSided)
else
    args = Args(px, py, pooled, alpha / 2, greater, less)
end

Base.Broadcast.broadcastable(args::Args) = Ref(args)  # make object iterable
args

Args([1 2 … 7 8; 1 2 … 7 9; … ; 8 10 … 15 16; 9 10 … 15 16], [16 15 … 10 9; 16 15 … 10 8; … ; 9 7 … 2 1; 8 7 … 2 1], false, 0.025f0, greater, less)

In [5]:
@time begin
results = coverage.(eachslice(x, dims=1),
                    eachslice(y, dims=1),
                    eachrow(wide),
                    eachrow(narrow),
                    deltas,
                    args)
end

1493.923841 seconds (308.72 M allocations: 5.592 TiB, 9.06% gc time, 0.18% compilation time)


100-element Vector{Tuple{Float32, Float32}}:
 (0.9066667, 35.776093)
 (0.85888886, 24.679276)
 (0.8888889, 59.28975)
 (0.9033333, 6.643051)
 (0.9, 46.236908)
 (0.87222224, 55.670696)
 (0.8761111, 14.079197)
 (0.8888889, 78.258385)
 (0.89666665, 46.271328)
 (0.88222224, 28.76648)
 (0.87333333, 28.460117)
 (0.8927778, 55.25492)
 (0.86388886, 38.454247)
 ⋮
 (0.8938889, 55.082275)
 (0.8811111, 40.6787)
 (0.85555553, 37.761208)
 (0.86944443, 53.11663)
 (0.86277777, 38.2919)
 (0.8611111, 37.792435)
 (0.88277775, 38.62868)
 (0.86833334, 24.852634)
 (0.895, 46.28684)
 (0.8838889, 59.354023)
 (0.8983333, 6.65358)
 (0.87333333, 40.38958)

In [8]:
probs  = [x for (x, _) in results]
widths = [x for (_, x) in results]
df = DataFrame(prob=probs, width=widths, distrX=distrX, distrY=distrY)

Unnamed: 0_level_0,prob,width,distrX,distrY
Unnamed: 0_level_1,Float32,Float32,Gamma…,Gamma…
1,0.906667,35.7761,"Gamma{Float64}(α=3.0, θ=12.0)","Gamma{Float64}(α=3.0, θ=12.0)"
2,0.858889,24.6793,"Gamma{Float64}(α=3.0, θ=12.0)","Gamma{Float64}(α=1.0, θ=4.0)"
3,0.888889,59.2897,"Gamma{Float64}(α=5.0, θ=20.0)","Gamma{Float64}(α=3.0, θ=12.0)"
4,0.903333,6.64305,"Gamma{Float64}(α=1.0, θ=4.0)","Gamma{Float64}(α=1.0, θ=4.0)"
5,0.9,46.2369,"Gamma{Float64}(α=3.0, θ=12.0)","Gamma{Float64}(α=4.0, θ=16.0)"
6,0.872222,55.6707,"Gamma{Float64}(α=2.0, θ=8.0)","Gamma{Float64}(α=5.0, θ=20.0)"
7,0.876111,14.0792,"Gamma{Float64}(α=1.0, θ=4.0)","Gamma{Float64}(α=2.0, θ=8.0)"
8,0.888889,78.2584,"Gamma{Float64}(α=5.0, θ=20.0)","Gamma{Float64}(α=5.0, θ=20.0)"
9,0.896667,46.2713,"Gamma{Float64}(α=3.0, θ=12.0)","Gamma{Float64}(α=4.0, θ=16.0)"
10,0.882222,28.7665,"Gamma{Float64}(α=2.0, θ=8.0)","Gamma{Float64}(α=3.0, θ=12.0)"


In [9]:
filename = (isTwoSided ? "twoSided" : "oneSided") * "_" * (pooled ? "" : "un") * "pooled" * "_" * string(args.alpha) * ".csv"
CSV.write("../results/" * filename, df)

"../results/oneSided_unpooled_0.025.csv"