In [55]:
using Random
using Distributions: Gamma
using DataFrames, CSV
include("utils.jl")
include("statistics.jl")

_var (generic function with 2 methods)

In [71]:
# permutation test configuration
pooled = false
alpha = 0.05

oneSided = true
if oneSided
    alternativeType = "oneSided"
    alt_lo, alt_hi = "greater", "less"
    alpha /= 2
else
    alternativeType = alt_lo = alt_hi = "twoSided"
end

0.025

In [72]:
# data dimensions
B  = 100      # num. coverage probabilities per boxplot
S  = 1800   # num. samples per coverage probability
nx = 6      # size of group 1
ny = 6      # size of group 2
px, py = partition(nx, ny)
dtype = Float32

Float32

In [73]:
# Generate data
Random.seed!(123)

shapeX = rand(1:10, B)
scaleX = rand(1:10, B)
distrX = Gamma.(shapeX, scaleX)

shapeY = rand(1000:1010, B)
scaleY = rand(1000:1010, B)
distrY = Gamma.(shapeY, scaleY)

deltas = @. (shapeX * scaleX) - (shapeY * scaleY)

# produces B vectors of S * nx elements
# each vector is drawn from a distinct (but not necessarily distinct) distribution
x = rand.(distrX, S * nx)
y = rand.(distrY, S * ny)

# flatten into (B, S * n)
x = hcat(x...)'
y = hcat(y...)'

# reshape to 3D batch
x = reshape(x, (B, S, nx))
y = reshape(y, (B, S, ny))

# Compute t confidence intervals for each of the (B x S x nx) pairs
wide   = tconf(x, y, alpha=0.00001, pooled=pooled)
narrow = tconf(x, y, alpha=0.4, pooled=pooled)

wide   = reshape(wide, B, S)
narrow = reshape(narrow, B, S)

100×1800 Matrix{Tuple{Float32, Float32}}:
 (-1.01801f6, -9.94906f5)  …  (-1.03184f6, -1.00108f6)
 (-1.00423f6, -9.79155f5)     (-1.02546f6, -1.00013f6)
 (-1.01172f6, -9.83759f5)     (-1.02642f6, -9.90234f5)
 (-1.0181f6, -9.95932f5)      (-1.02037f6, -9.90036f5)
 (-1.0411f6, -1.03103f6)      (-1.03132f6, -1.01038f6)
 (-1.01065f6, -993802.0)   …  (-1.00202f6, -9.73796f5)
 (-1.01233f6, -9.91354f5)     (-1.04439f6, -1.01714f6)
 (-1.01661f6, -9.94613f5)     (-1.00337f6, -9.893f5)
 (-1.00685f6, -9.88251f5)     (-1.01781f6, -9.95413f5)
 (-1.01544f6, -9.95905f5)     (-1.02347f6, -1.00633f6)
 (-9.96336f5, -9.81876f5)  …  (-9.98995f5, -9.84405f5)
 (-1.00474f6, -9.89105f5)     (-1.01513f6, -9.88519f5)
 (-1.02424f6, -1.00168f6)     (-1.00828f6, -9.89986f5)
 ⋮                         ⋱  
 (-1.01505f6, -9.81305f5)     (-1.01933f6, -1.00207f6)
 (-1.02937f6, -1.00868f6)     (-1.02804f6, -1.01996f6)
 (-1.0206f6, -1.00058f6)   …  (-1.03474f6, -1.0113f6)
 (-1.03646f6, -1.0117f6)      (-1.03498f6, -1.0027

In [74]:
function pval(x, y, pooled=false, alternative="two_sided", delta=0)
    """
    Parameters
    ----------
    x : Vector{Real}
        Data for group 1
    
    y : Vector{Real}
        Data for group 2
    
    pooled : Bool
        Assume equal/unequal variances for the two groups
    
    alternative : String
        Type of alternative hypothesis
    
    delta : Real
        Null hypothesis difference in means
    
    Returns
    -------
    Float64
        Proportion of pairs among all sample combinations which have
        a test statistic as or more extreme than the original pair (x, y)
    """
    x_shift = x .- delta  # shift group 1 under null hypothesis
    t_obs = t(x_shift, y, pooled)  # test statistic for observed data
    combined = vcat(x_shift, y)  # join original pair into single vector
    xs = combined[px]   # get all combinations of pairs from original pair
    ys = combined[py]
    ts = t(xs, ys, pooled)   # test statistic for all possible pairs of samples
    
    if alternative == "less"
        n_extreme = count(ts .<= t_obs)
    elseif alternative == "greater"
        n_extreme = count(ts .>= t_obs)
    else
        n_extreme = count(@. (ts <= -abs(t_obs)) | (ts >= abs(t_obs)))
    end

    return n_extreme / size(px, 1)  # proportion of pairs w/ extreme test statistic
    
end

function search(x, y, start, stop;
                pooled=false, alternative="two_sided", isLowerBound=true,
                margin=0.005, threshold=1.0, alpha=0.05)
    p_start = pval(x, y, pooled, alternative, start)
    p_end   = pval(x, y, pooled, alternative, stop)
    #println("p_start = ", p_start, ", p_end = ", p_end)
    
    """
    if (p_start - alpha) * (p_end - alpha) > 0
        @show (p_start, p_end, alpha, x, y)
        @show alternative
    end
    """
    
    # p-values corresponding to `start` and `stop` must be on opposite sides of `alpha`
    @assert (p_start - alpha) * (p_end - alpha) <= 0

    p = p_new = delta = nothing
    percent_change = (old, new) -> 100 * abs(new-old) / old
    
    while true
        delta = (start + stop) / 2
        p_new = pval(x, y, pooled, alternative, delta)

        if !isnothing(p) && percent_change(p, p_new) <= threshold
            break  # (1) percent change in p-value is below `threshold`
        end
        
        compare = (alpha - p_new) - isLowerBound * 2 * (alpha - p_new)
        if margin < compare
            stop = delta
        elseif margin < -compare
            start = delta
        else
            break  # (2) p-value is within `margin` of `alpha`
        end

        p = p_new
    end
    
    return delta
end

function permInterval(x, y, wide, narrow, delta_true, alt_lo, alt_hi; pooled=false, alpha=0.05)
    """Returns true (false) if permutation test confidence interval does (not) include difference in
    population means.
    Parameters
    ----------
    x1 : Vector{Float64}
        Data for group 1
    x2 : Vector{Float64}
        Data for group 2
    partitions : Tuple{Matrix{Int64}, Matrix{Int64}}
        The i-th rows of x1[partitions[1]] and x2[partitions[2]] denote the i-th arrangement of
        the original (n1+n2) observations into two groups of size n1 and n2.
    delta_true : Float64
        Difference in population means
    pooled : Bool
        Assume pooled or unpooled variances
    alpha : Float64
        Significance level
    alternative : String
        Type of alternative hypothesis ("two-sided", "smaller", "larger")
    Returns
    -------
    Bool
        True (false) if permutation test confidence interval does (not) include difference in population means.
    """

    wide_lo, wide_hi = wide
    narrow_lo, narrow_hi = narrow

    # use binary search to find approximate permutation test confidence interval
    lo = search(x, y, wide_lo, narrow_lo,
                pooled=pooled, alpha=alpha, alternative=alt_lo, isLowerBound=true)
    hi = search(x, y, narrow_hi, wide_hi,
                pooled=pooled, alpha=alpha, alternative=alt_hi, isLowerBound=false)
    return [(lo <= delta_true <= hi), hi - lo]
end

function coverage(xs, ys, wide, narrow, delta_true, alt_lo, alt_hi;
                  pooled=false, alpha=0.05)
    results = permInterval.(eachrow(xs), eachrow(ys), wide, narrow, delta_true, alt_lo, alt_hi,
                            pooled=pooled, alpha=alpha)
    results = hcat(results...)
    coverage = sum(results[1,:]) / S
    avg_CI_width = mean(results[2,:])
    return coverage, avg_CI_width
end

coverage (generic function with 1 method)

In [75]:
@time begin
results = coverage.(eachslice(x, dims=1),
                    eachslice(y, dims=1),
                    eachrow(wide),
                    eachrow(narrow),
                    deltas,
                    alt_lo,
                    alt_hi,
                    pooled=pooled,
                    alpha=alpha)
end

 93.602898 seconds (163.78 M allocations: 514.700 GiB, 16.86% gc time, 0.26% compilation time)


100-element Vector{Tuple{Float32, Float32}}:
 (0.9611111, 95017.016)
 (0.9588889, 99104.89)
 (0.95055556, 96474.22)
 (0.9688889, 97479.555)
 (0.96, 95479.914)
 (0.9638889, 95827.12)
 (0.96, 97665.56)
 (0.96166664, 95868.81)
 (0.95555556, 96062.4)
 (0.96166664, 95432.016)
 (0.9688889, 98032.445)
 (0.9622222, 97379.414)
 (0.96166664, 97646.45)
 ⋮
 (0.955, 96271.2)
 (0.95555556, 94821.17)
 (0.9572222, 97363.85)
 (0.9572222, 97216.53)
 (0.9611111, 98741.74)
 (0.9583333, 98333.1)
 (0.9688889, 97574.516)
 (0.9627778, 96417.9)
 (0.9572222, 95129.76)
 (0.96944445, 99518.58)
 (0.9622222, 96546.45)
 (0.9638889, 98259.16)

In [76]:
probs  = [x for (x, _) in results]
widths = [x for (_, x) in results]
df = DataFrame(prob=probs, width=widths, distrX=distrX, distrY=distrY)

Unnamed: 0_level_0,prob,width,distrX,distrY
Unnamed: 0_level_1,Float32,Float32,Gamma…,Gamma…
1,0.961111,95017.0,"Gamma{Float64}(α=6.0, θ=6.0)","Gamma{Float64}(α=1008.0, θ=1001.0)"
2,0.958889,99104.9,"Gamma{Float64}(α=6.0, θ=1.0)","Gamma{Float64}(α=1008.0, θ=1010.0)"
3,0.950556,96474.2,"Gamma{Float64}(α=9.0, θ=6.0)","Gamma{Float64}(α=1007.0, θ=1007.0)"
4,0.968889,97479.6,"Gamma{Float64}(α=2.0, θ=2.0)","Gamma{Float64}(α=1004.0, θ=1010.0)"
5,0.96,95479.9,"Gamma{Float64}(α=6.0, θ=7.0)","Gamma{Float64}(α=1007.0, θ=1007.0)"
6,0.963889,95827.1,"Gamma{Float64}(α=4.0, θ=10.0)","Gamma{Float64}(α=1000.0, θ=1001.0)"
7,0.96,97665.6,"Gamma{Float64}(α=1.0, θ=3.0)","Gamma{Float64}(α=1002.0, θ=1010.0)"
8,0.961667,95868.8,"Gamma{Float64}(α=10.0, θ=10.0)","Gamma{Float64}(α=1000.0, θ=1003.0)"
9,0.955556,96062.4,"Gamma{Float64}(α=6.0, θ=7.0)","Gamma{Float64}(α=1005.0, θ=1006.0)"
10,0.961667,95432.0,"Gamma{Float64}(α=4.0, θ=6.0)","Gamma{Float64}(α=1006.0, θ=1009.0)"


In [77]:
filename = alternativeType * "_" * (pooled ? "" : "un") * "pooled" * "_" * string(alpha) * ".csv"
CSV.write(filename, df)

"oneSided_unpooled_0.025.csv"

- overview/explain two-sample t-test
- derive t test confidence interval by inverting the t test of H0: delta = delta0 vs. H_A: delta != delta0, where delta = mu_1 - mu_2, delta0 in R
    - CI is the set of all deltas for which we do NOT reject the null == set of all deltas for which p-value > 0.05
    - set of all deltas for which the test statistic < t quantile == set of all deltas for which [mean(x1) - mean(x2) - delta] / s.d. (pooled or unpooled) < t quantile
    - output after solving for delta is the CI
- mention that the test statistic for difference in means (for pooled variances) follows a t distribution
    - DoF also changes for unpooled variances
- permutation test
    - explain how permTest p-value is derived (detail interval halving, etc.)
    - similar to above t test overview, start with two-sided alternative, then explain for inverting two one-sided tests (here, use alpha/2 instead of alpha)
    - literature review
- fix DoF in unpooled tconf()