Here we compare 4 testing schemes: Dubey & Muller, Energy statistic (Szekely & Rizzo 2004), HIPM, WoW.

In particular we compare power functions on the example of random probability measures from Fig 1 Dubey & Muller.

In [1]:
using Plots
using KernelDensity
using RCall # to call R functions



include("distributions.jl")

include("distances/new_distance.jl")
include("distances/distance_Wasserstein.jl")


ww (generic function with 2 methods)

# Testing scheme according to Dubey & Muller

In [2]:
function test_statistic_dm(μ_1::Vector{Float64}, μ_2::Vector{Float64})
    # This function computes the test statistic according to Dubey & Muller for a given two samples of means of Normal distributions
    # Inputs:
        # μ_1, μ_2 : two samples of means of Normal distributions, each of which is a vector of Float64 numbers
    
    n = length(μ_1)
    # Recall that we assume that each of the sample is Normal distribution, and for W_2 metric we only need their means.
    X = μ_1 # collect all the means from first samples of probability measures
    Y = μ_2 # collect all the means from second samples of probability measures

    μ_hat_1 = mean(X)
    μ_hat_2 = mean(Y)

    v_hat_1 = mean((X .- μ_hat_1).^2)
    v_hat_2 = mean((Y .- μ_hat_2).^2)

    σ_hat_1_squared = mean((X .- μ_hat_1).^4) - (v_hat_1)^2
    σ_hat_2_squared = mean((Y .- μ_hat_2).^4) - (v_hat_2)^2
    
    μ_hat_p = sum(X .+ Y) / (2 * n)
    v_hat_p = sum( (X .- μ_hat_p).^2 .+ (Y .- μ_hat_p).^2 ) / (2 * n)

    F_n = v_hat_p - v_hat_1/2 - v_hat_2/2
    U_n = (1/4) * ((v_hat_1 - v_hat_2)^2) / (σ_hat_1_squared * σ_hat_2_squared) 
    
    T_n = 2*n*U_n / (1/(2*σ_hat_1_squared) + 1/(2*σ_hat_2_squared) ) + 2 * n * (F_n^2) / (σ_hat_1_squared/4 + σ_hat_2_squared/4)

    return T_n
end








test_statistic_dm (generic function with 1 method)

In [3]:
function decide_dm_asympt(pms_1::Vector{Float64}, pms_2::Vector{Float64}, n_boostrap::Int, θ = 0.05)
    # This function implements the two-sample test according to Dubey & Muller using asymptotic distribution of the test statistic. It returns
    # either 0 or 1, i.e. either rejects or accepts null hypothesis for given significance level θ.

    # Inputs:
        # pms_1, pms_2 : two samples of probability measures, each of which is a vector of Normal distributions
        # θ : significance level, default value is 0.05
        # n_boostrap: number of times we repeat bootstrap procedure to estimate the quantile of the test statistic


    # threshold is choosen from asymptotic distribution of test statistic which is chi-squared.

    n_top = length(pms_1)
    T_n = test_statistic_dm(pms_1, pms_2) # test statistic
    
    threshold = quantile(Chisq(1), 1 - θ) # obtain quantile from asymptotic distribution of test statistic
    decision = 1.0*(T_n > threshold) # 1.0 if T_n > threshold, 0.0 otherwise.
    return decision
end


function decide_dm_boostrap(pms_1::Vector{Float64}, pms_2::Vector{Float64}, n_boostrap::Int, θ = 0.05)
    # This function implements the two-sample test according to Dubey & Muller using boostrap approach. It returns
    # either 0 or 1, i.e. either rejects or accepts null hypothesis for given significance level θ.

    # Inputs:
        # pms_1, pms_2 : two samples of means of Normal distributions with variance 1
        # θ : significance level, default value is 0.05
        # n_boostrap: number of times we repeat bootstrap procedure to estimate the quantile of the test statistic

    # Threshold is obtained via boostrap procedure.
    n_top = length(pms_1)
    T_n = test_statistic_dm(pms_1, pms_2) # test statistic
    # obtain quantile using bootstrap approach
    T_n_boostrap = zeros(n_boostrap)
    for s in 1:n_boostrap
        allmeasures = vcat(pms_1, pms_2) # collect all probability measures into one vector
        pms_1_boostrap = sample(allmeasures, n_top; replace=true) # resample from pooled probability measures
        pms_2_boostrap = sample(allmeasures, n_top; replace=true) # resample from pooled probability measures
        T_n_boostrap[s] = test_statistic_dm(pms_1_boostrap, pms_2_boostrap) # test statistic from boostraped sample
    end
    threshold = quantile(T_n_boostrap, 1-θ)
    decision = 1.0*(T_n > threshold) # 1.0 if T_n > threshold, 0.0 otherwise.
    return decision
end



function decide_denanova_from_r(mu_1::Vector{Float64}, mu_2::Vector{Float64};
                           sd::Float64 = 1.0, nq::Int = 51, qmin=0.01, qmax=0.99,
                           n_boostrap::Int = 1000, seed = 1234, θ::Float64 = 0.05)

    # This function implements the two-sample test according Dubey & Muller using the function to DenANOVA from R package "frechet".
    # It returns either 0 or 1, i.e. either rejects or accepts null hypothesis for given significance level θ.

    # Inputs:
        # mu_1, mu_2 : two samples of means of Normal distributions with variance 1.
        # sd : standard deviation of each Normal distribution, default value is 1.0, we do not change it.
        # nq : number of grid points to approximate quantile functions, default value is 51
        # qmin, qmax : min and max quantiles to approximate quantile functions, default values are 0.01 and 0.99
        # θ : significance level, default value is 0.05
        # n_boostrap: number of times we repeat bootstrap procedure to estimate the quantile of the test statistic
        # seed : random seed for R, default value is 1234

    @rput mu_1 mu_2 sd nq qmin qmax n_boostrap seed

    R"""
    # if (!requireNamespace("frechet", quietly = TRUE)) {
    #   install.packages("frechet", repos="https://cloud.r-project.org")
    # }

    set.seed(seed)

    n1 <- length(mu_1)
    n2 <- length(mu_2)
    qSup <- seq(qmin, qmax, length.out = nq)

    Y1 <- lapply(seq_len(n1), function(i) qnorm(qSup, mean = mu_1[i], sd = sd))
    Y2 <- lapply(seq_len(n2), function(i) qnorm(qSup, mean = mu_2[i], sd = sd))

    Ly <- c(Y1, Y2)
    Lx <- qSup
    group <- c(rep(1, n1), rep(2, n2))

    res <- frechet::DenANOVA(qin = Ly, supin = Lx, group = group,
                    optns = list(boot = TRUE, R = n_boostrap))

    p_boot <- res$pvalBoot
    """
    @rget p_boot
    return 1 * (p_boot < θ)
end




decide_denanova_from_r (generic function with 1 method)

# Testing scheme according to Energy statistic (Szekely & Rizzo 2004) 

In [4]:
function test_statistic_energy(pms_1::Vector{Normal}, pms_2::Vector{Normal})
    # This function computes the test statistic according to Energy statistic (Szekely & Rizzo 2004) for a given two samples of probobability measures
    # which are normal distributions.

    # Inputs:
        # pms_1, pms_2 : two samples of probability measures, each of which is a vector of Normal distributions

    # Recall that we assume that each of the sample is Normal distribution, and for W_2 metric we only need their means.
    n = length(pms_1)
    @assert n == length(pms_2) "two samples of probability measures must have same size "

    X = [pms_1[i].μ for i in 1:n] # collect all the means from first samples of probability measures
    Y = [pms_2[i].μ for i in 1:n] # collect all the means from second samples of probability measures

    distances_X = [abs(X[i] - X[j]) for i in 1:n, j in 1:n]
    distances_Y = [abs(Y[i] - Y[j]) for i in 1:n, j in 1:n]
    distances_XY = [abs(X[i] - Y[j]) for i in 1:n, j in 1:n]


    distance = 2 * mean(distances_XY) - mean(distances_X) - mean(distances_Y)
    T_n = distance * n / 2
    return T_n
end



function decide_energy_boostrap(pms_1::Vector{Normal}, pms_2::Vector{Normal}, n_boostrap::Int, θ = 0.05)
    # This function implements the two-sample test according to Energy statistic (Szekely & Rizzo 2004) using boostrap approach. It returns
    # either 0 or 1, i.e. either rejects or accepts null hypothesis for given significance level θ.

    # Inputs:
        # pms_1, pms_2 : two samples of probability measures, each of which is a vector of Normal distributions
        # θ : significance level, default value is 0.05
        # n_boostrap: number of times we repeat bootstrap procedure to estimate the quantile of the test statistic


    n_top = length(pms_1)
    T_n = test_statistic_energy(pms_1, pms_2) # test statistic
    # obtain quantile using bootstrap approach
    T_n_boostrap = zeros(n_boostrap)
    for s in 1:n_boostrap
        allmeasures = vcat(pms_1, pms_2) # collect all probability measures into one vector
        pms_1_boostrap = sample(allmeasures, n_top; replace=true) # resample from pooled probability measures
        pms_2_boostrap = sample(allmeasures, n_top; replace=true) # resample from pooled probability measures
        T_n_boostrap[s] = test_statistic_energy(pms_1_boostrap, pms_2_boostrap) # test statistic from boostraped sample
    end
    threshold = quantile(T_n_boostrap, 1-θ)
    decision = 1.0*(T_n > threshold) # 1.0 if T_n > threshold, 0.0 otherwise.
    return decision
end

decide_energy_boostrap (generic function with 2 methods)

# Testing scheme according to HIPM and WoW

In [5]:
function get_thresholds_permutation_hipm_wow(q_1::PPM, q_2::PPM, n_top::Int, n_bottom::Int, n_permutations::Int, θ::Float64)
    # This function gets the thresholds for HIPM and WoW distances using permutation approach. It obtains hierarchical samples 
    # from two given RPMs and use them for permutation procedure.

    # Inputs:
        # q_1, q_2 : laws of two RPMs
        # n_top : number of atoms in hierarchical samples generated from RPMs
        # n_bottom : number of observations generated from each atom in hierarchical samples
        # n_permutations : number of permutations to approximate the quantile of the distance
        # θ : significance level, default value is 0.05
    
 
    permuted_samples_dlip = zeros(n_permutations) # store samples of distances
    permuted_samples_ww = zeros(n_permutations) # store samples of distances

    hier_sample_1, hier_sample_2 = generate_emp(q_1, n_top, n_bottom), generate_emp(q_2, n_top, n_bottom) 
    a = minimum([hier_sample_1.a, hier_sample_2.a])
    b = maximum([hier_sample_1.b, hier_sample_2.b])
    for k in 1:n_permutations
        total_rows = vcat(hier_sample_1.atoms, hier_sample_2.atoms) # collect all rows
        random_indices = randperm(2*n_top) # indices to distribute rows to new hierarchical meausures

        atoms_1 = total_rows[random_indices[1:n_top],:] # first rows indexed by n_top random indices to the atoms_1
        atoms_2 = total_rows[random_indices[n_top+1:end],:] # first rows indexed by n_top random indices to the atoms_2

        hier_sample_1_permuted = emp_ppm(atoms_1, n_top, n_bottom, a, b)
        hier_sample_2_permuted = emp_ppm(atoms_2, n_top, n_bottom, a, b)

        
        permuted_samples_dlip[k] = dlip(hier_sample_1_permuted, hier_sample_2_permuted)
        permuted_samples_ww[k] = ww(hier_sample_1_permuted, hier_sample_2_permuted)
    end

    threshold_hipm = quantile(permuted_samples_dlip, 1 - θ)
    threshold_wow = quantile(permuted_samples_ww, 1 - θ)

    return threshold_hipm, threshold_wow
end
    





function get_thresholds_boostrap_hipm_wow(q_1::PPM, q_2::PPM, n_top::Int, n_bottom::Int, n_boostrap::Int, θ::Float64)
    # This function gets the thresholds for HIPM and WoW distances using boostrap approach. It obtains hierarchical samples 
    # from two given RPMs and use them for boostrap procedure.

    # Inputs:
        # q_1, q_2 : laws of two RPMs
        # n_top : number of atoms in hierarchical samples generated from RPMs
        # n_bottom : number of observations generated from each atom in hierarchical samples
        # n_boostrap : number of boostrap samples to approximate the quantile of the distance
        # θ : significance level, default value is 0.05
    
 
    boostrap_samples_dlip = zeros(n_boostrap) # store samples of distances
    boostrap_samples_ww = zeros(n_boostrap) # store samples of distances

    hier_sample_1, hier_sample_2 = generate_emp(q_1, n_top, n_bottom), generate_emp(q_2, n_top, n_bottom)
    a = minimum([hier_sample_1.a, hier_sample_2.a])
    b = maximum([hier_sample_1.b, hier_sample_2.b])
    for k in 1:n_boostrap
        total_rows = vcat(hier_sample_1.atoms, hier_sample_2.atoms) # collect all rows
        
        indices_1 = sample(1:2*n_top, n_top; replace = true)
        indices_2 = sample(1:2*n_top, n_top; replace = true)
        atoms_1 = total_rows[indices_1,:]  # resample from pooled hierarchical sample
        atoms_2 = total_rows[indices_2,:]  # resample from pooled hierarchical sample
        
    
        hier_sample_1_boostrap = emp_ppm(atoms_1, n_top, n_bottom, a, b)
        hier_sample_2_boostrap = emp_ppm(atoms_2, n_top, n_bottom, a, b)

        boostrap_samples_dlip[k] = dlip(hier_sample_1_boostrap, hier_sample_2_boostrap)
        boostrap_samples_ww[k] = ww(hier_sample_1_boostrap, hier_sample_2_boostrap)
    end

    threshold_hipm = quantile(boostrap_samples_dlip, 1 - θ)
    threshold_wow = quantile(boostrap_samples_ww, 1 - θ)

    return threshold_hipm, threshold_wow
end
    



get_thresholds_boostrap_hipm_wow (generic function with 1 method)

# Rejection rates for each of the method (DM, Energy stats, HIPM, WoW)


In [None]:
function rejection_rate(q_1::PPM, q_2::PPM, S::Int, n_top::Int, n_bottom::Int, n_boostrap::Int, θ::Float64, boostrap::Bool=true)
    # This function computes the rejection rates for given two laws of RPM, q_1 and q_2, for 4 testing schemes:
    # Dubey & Mueller, HIPM, WoW, Energy statistic

    # Input: 
        # q_1, q_2 : laws of two RPMs
        # S : number of times we simulate two samples from given laws of RPM
        # n_top : number of probability measures we simulate from each q
        # n_bottom : number of random variables we simulate from each of the probability measure from q
        # n_boostrap : number of times we repeat bootstrap procedure to estimate the quantile of the test statistic. Note that this can
        #              be number of permutations, depending whether boostrap variable is true or false but only for HIPM and WoW. For DM and
        #              Energy statistic, this is always number of boostrap samples.
        # θ : significance level, default value is 0.05
        # boostrap : if true, then thresholds for HIPM and WoW are obtained via boostrap approach, otherwise via permutation approach.

    # Output:
        # rej_rate_dm : rejection rate for Dubey & Mueller test
        # rej_rate_hipm : rejection rate for HIPM test
        # rej_rate_wow : rejection rate for WoW test
        # rej_rate_energy : rejection rate for Energy statistic test    


  
    rej_rate_dm, rej_rate_hipm, rej_rate_wow, rej_rate_energy = 0.0, 0.0, 0.0, 0.0
    #time_perm = time()

    # Instead of getting the threshold for HIPM and WoW based on hierarchical sample everytime, for fixed q_1 and q_2, we obtain
    # threshold once from some generated hierarchical samples and then use it for other hierarchical samples.
    if boostrap
        threshold_hipm, threshold_wow = get_thresholds_boostrap_hipm_wow(q_1, q_2, n_top, n_bottom, n_boostrap, θ) 
    else
        threshold_hipm, threshold_wow = get_thresholds_permutation_hipm_wow(q_1, q_2, n_top, n_bottom, n_boostrap, θ) 
    end
    #println("time taken to get thresholds is $(time() - time_perm) seconds")
    
    #time_S = time()
    for s in 1:S
        
        pms_1, pms_2 = generate_prob_measures(q_1, n_top), generate_prob_measures(q_2, n_top) # generate n_top probability measures
                                            # from q_1 and q_2

        mu_1, mu_2 = [pm_1.μ for pm_1 in pms_1], [pm_2.μ for pm_2 in pms_2] # collect means of all probability measures in pms_1 and pms_2

        hier_sample_1, hier_sample_2 = generate_emp(pms_1, n_top, n_bottom), generate_emp(pms_2, n_top, n_bottom) # generate n_bottom
                                            # random variables from each probability measures in pms_1 and pms_2
        # endpoints of the sample space for observatinos from hier_sample_1 and hier_sample_2 might be different, so we fix it
        a = minimum([hier_sample_1.a, hier_sample_2.a])
        b = maximum([hier_sample_1.b, hier_sample_2.b])
        
        hier_sample_1.a = a
        hier_sample_2.a = a

        hier_sample_1.b = b
        hier_sample_2.b = b

        # record if testing schemes reject
        rej_rate_dm += decide_denanova_from_r(mu_1, mu_2; n_boostrap=n_boostrap, θ = θ)  
        rej_rate_hipm += 1*(dlip(hier_sample_1, hier_sample_2) > threshold_hipm)
        rej_rate_wow += 1*(ww(hier_sample_1, hier_sample_2) > threshold_wow)

        rej_rate_energy += decide_energy_boostrap(pms_1, pms_2, n_boostrap, θ)
    end
    #println("time taken for S=$(S) is $(time() - time_S) seconds")
    rej_rate_dm /= S
    rej_rate_hipm /= S
    rej_rate_wow /= S
    rej_rate_energy /= S
    return rej_rate_dm,  rej_rate_hipm, rej_rate_wow, rej_rate_energy
end


rejection_rate (generic function with 2 methods)

In the simulations below, we consider the several pairs of laws of random probability measures for which we want to obtain the rejection rates. Such laws will be parametrized. Below we have a code that will record rejection rates for such pairs of laws of rpm per each parameter.

In [7]:
function rejection_rates_per_parameter(δs::Vector{Float64}, rej_rate_function::Function)
    # This function returns rejection rates using rej_rate_function for each each δ in δs. The parameter δ
    # is used to define two laws of RPMs, q_1 and q_2, for which we compute rejection rates.

    # Input:
        # δs : vector of Float64 numbers, each of which is used to define two laws of RPMs, q_1 and q_2
        # rej_rate_function : function which takes δ as input and returns rejection rates.
    
    rej_rates = zeros(length(δs), 4) # per each delta and testing scheme

    for i in 1:length(δs)
        println("parameter is $(δs[i])")
        rej_rates[i,:] .= rej_rate_function(δs[i]) # returns rejection rate for δ[i] for each testing scheme
    end
    return rej_rates
end

rejection_rates_per_parameter (generic function with 1 method)

# Figure 1, Left, of the paper, varying mean, with permutation


Now we plot the power function per $\delta$ as it is in their paper for Figure 1


In [8]:
n_top, n_bottom = 100,2000
n_boostrap = 1
S = 1
θ = 0.05

δ = 0.1

q_1 = tnormal_normal(0.0, 0.5, -10.0, 10.0)
q_2 = tnormal_normal(δ, 0.5, -10.0, 10.0)


prob_measures_1 = generate_prob_measures(q_1, n_top)
prob_measures_2 = generate_prob_measures(q_2, n_top)


q_1_emp = generate_emp(q_1, n_top, n_bottom)
q_2_emp = generate_emp(q_2, n_top, n_bottom)

a = minimum([q_1_emp.a, q_2_emp.a])
b = maximum([q_1_emp.b, q_2_emp.b])
q_1_emp.a = a
q_2_emp.a = a
q_1_emp.b = b
q_2_emp.b = b





5.096357709295051

To obtain the thresholds for HIPM and WoW when n_boostrap = 100,n_top = 100,n_bottom = 2000, we need 2.4 seconds per each boostrap. So 240 seconds in total

Once thresholds are obtained for each generated hierarchical samples we need 3 seconds to get decisions from all testing schemes (n_boost = 100).

Total time for S = 100 for one pair of measures is maximum 9 minutes





for n_bottom = 200, we need in total 103 seconds for thresholds for HIPM and WoW
Once thresholds are obtained we need 1.7 seconds for only one generated sample.
Total time in that case is 5 minutes

In [9]:
#s, n_bottom, δs

In [None]:
# n_top, n_bottom = 100,200
# n_permutation = 100
# S = 100
# θ = 0.05

# Random.seed!(1234) # for reproducibility

# # maybe variance should be sqrt(0.5) instead of 0.5 ???
# power_tnormal_normal(δ) = rejection_rate(tnormal_normal(0.0, 0.5, -10.0, 10.0), tnormal_normal(δ, 0.5, -10.0, 10.0),S, n_top, n_bottom, n_permutation, θ,false) # powers per δ for each testing scheme

# #δs = collect(-1.0:0.1:1.0)
# δs = [0.1]
# rejection_rates_tnormal_normal = rejection_rates_per_parameter(δs, power_tnormal_normal)
# fig_1 = plot(title = "Power of 4 testing schemes", xlabel = "δ", ylabel = "Power", xlims=(-1.0, 1.1), ylims = (-0.1, 1.1))
# plot!(fig_1, δs, rejection_rates_tnormal_normal[:,1], label = "dm", color = "red")
# plot!(fig_1, δs, rejection_rates_tnormal_normal[:,2], label = "hipm", color = "green")
# plot!(fig_1, δs, rejection_rates_tnormal_normal[:,3], label = "wow", color = "brown")
# plot!(fig_1, δs, rejection_rates_tnormal_normal[:,4], label = "Energy", color = "blue")
# filepath = joinpath(pwd(), "frechet/figure1")
# savefig(fig_1,joinpath(filepath, "power_tnormal_normal_varying_mean_n_top=$(n_top)_n_bottom=$(n_bottom)_S=$(S)_npermutation=$(n_permutation).png"))

# # # #minimum_power_dm = minimum(power_function_dm[])
# # # #hline!([minimum_power_dm], color=:red, linestyle=:dash, label="minimum power for dm")




parameter is 0.1
time taken to get thresholds is 96.7047369480133 seconds
time taken for S=100 is 181.14315676689148 seconds


"/Users/giorgikanchaveli/Desktop/Two-samples-test/frechet/figure1/power_tnormal_normal_varying_mean_n_top=100_n_bottom=200_S=100_npermutation=100.png"

# Figure 1, Left, of the paper, varying mean, with Boostrap


To obtain the thresholds for HIPM and WoW when n_boostrap = 100,n_top = 100,n_bottom = 2000, we need 2.4 seconds per each boostrap. So 240 seconds in total

Once thresholds are obtained for each generated hierarchical samples we need 3 seconds to get decisions from all testing schemes (n_boost = 100).

Total time for S = 100 for one pair of measures is maximum 9 minutes





for n_bottom = 200, we need in total 103 seconds for thresholds for HIPM and WoW
Once thresholds are obtained we need 1.7 seconds for only one generated sample.
Total time in that case is 5 minutes

In [None]:
# n_top, n_bottom = 100,2000
# n_boostrap = 100
# S = 100
# θ = 0.05

# Random.seed!(1234) # for reproducibility

# # maybe variance should be sqrt(0.5) instead of 0.5 ???
# power_tnormal_normal(δ) = rejection_rate(tnormal_normal(0.0, 0.5, -10.0, 10.0), tnormal_normal(δ, 0.5, -10.0, 10.0),S, n_top, n_bottom, n_boostrap, θ,true) # powers per δ for each testing scheme

# δs = collect(-1.0:0.1:1.0)
# rejection_rates_tnormal_normal = rejection_rates_per_parameter(δs, power_tnormal_normal)
# fig_1 = plot(title = "Power of 4 testing schemes", xlabel = "δ", ylabel = "Power", xlims=(-1.0, 1.1), ylims = (-0.1, 1.1))
# plot!(fig_1, δs, rejection_rates_tnormal_normal[:,1], label = "dm", color = "red")
# plot!(fig_1, δs, rejection_rates_tnormal_normal[:,2], label = "hipm", color = "green")
# plot!(fig_1, δs, rejection_rates_tnormal_normal[:,3], label = "wow", color = "brown")
# plot!(fig_1, δs, rejection_rates_tnormal_normal[:,4], label = "Energy", color = "blue")
# filepath = joinpath(pwd(), "frechet/figure1")
# savefig(fig_1,joinpath(filepath, "power_tnormal_normal_varying_mean_n_top=$(n_top)_n_bottom=$(n_bottom)_S=$(S)_nboostrap=$(n_boostrap).png"))

# # # #minimum_power_dm = minimum(power_function_dm[])
# # # #hline!([minimum_power_dm], color=:red, linestyle=:dash, label="minimum power for dm")




# Figure 1, Right of the paper, varying variance, with boostrap


To obtain the thresholds for HIPM and WoW when n_boostrap = 100,n_top = 100,n_bottom = 2000, we need 2.4 seconds per each boostrap. So 240 seconds in total

Once thresholds are obtained for each generated hierarchical samples we need 3 seconds to get decisions from all testing schemes (n_boost = 100).

Total time for S = 100 for one pair of measures is maximum 9 minutes





for n_bottom = 200, we need in total 103 seconds for thresholds for HIPM and WoW
Once thresholds are obtained we need 1.7 seconds for only one generated sample.
Total time in that case is 5 minutes

In [None]:
# n_top, n_bottom = 100,2000
# n_boostrap = 100
# S = 100
# θ = 0.05

# Random.seed!(1234) # for reproducibility

# # maybe variance should be sqrt(0.5) instead of 0.5 ???
# power_tnormal_normal(τ) = rejection_rate(tnormal_normal(0.0, 0.2, -10.0, 10.0), tnormal_normal(0.0, 0.2*τ, -10.0, 10.0),S, n_top, n_bottom, n_boostrap, θ, true) # powers per δ for each testing scheme

# τs = collect(0.1:0.1:3.0)
# rejection_rates_tnormal_normal = rejection_rates_per_parameter(τs, power_tnormal_normal)
# fig_2 = plot(title = "Power of 4 testing schemes", xlabel = "τ", ylabel = "Power", xlims=(0.0, 3.1), ylims = (-0.1, 1.1))
# plot!(fig_2, τs, rejection_rates_tnormal_normal[:,1], label = "dm", color = "red")
# plot!(fig_2, τs, rejection_rates_tnormal_normal[:,2], label = "hipm", color = "green")
# plot!(fig_2, τs, rejection_rates_tnormal_normal[:,3], label = "wow", color = "brown")
# plot!(fig_2, τs, rejection_rates_tnormal_normal[:,4], label = "Energy", color = "blue")
# filepath = joinpath(pwd(), "frechet/figure1")
# savefig(fig_2,joinpath(filepath, "power_tnormal_normal_varying_variance_n_top=$(n_top)_n_bottom=$(n_bottom)_S=$(S)_nboostrap=$(n_boostrap).png"))

# # # # #minimum_power_dm = minimum(power_function_dm[])
# # # # #hline!([minimum_power_dm], color=:red, linestyle=:dash, label="minimum power for dm")




# Figure 1, Right of the paper, varying variance, with permutation approach

To obtain the thresholds for HIPM and WoW when n_boostrap = 100,n_top = 100,n_bottom = 2000, we need 2.4 seconds per each boostrap. So 240 seconds in total

Once thresholds are obtained for each generated hierarchical samples we need 3 seconds to get decisions from all testing schemes (n_boost = 100).

Total time for S = 100 for one pair of measures is maximum 9 minutes





for n_bottom = 200, we need in total 103 seconds for thresholds for HIPM and WoW
Once thresholds are obtained we need 1.7 seconds for only one generated sample.
Total time in that case is 5 minutes

In [None]:
# n_top, n_bottom = 100,2000
# n_permutation = 100
# S = 100
# θ = 0.05

# Random.seed!(1234) # for reproducibility

# # maybe variance should be sqrt(0.5) instead of 0.5 ???
# power_tnormal_normal(τ) = rejection_rate(tnormal_normal(0.0, 0.2, -10.0, 10.0), tnormal_normal(0.0, 0.2*τ, -10.0, 10.0),S, n_top, n_bottom, n_permutation, θ, false) # powers per δ for each testing scheme

# τs = collect(0.1:0.1:3.0)
# rejection_rates_tnormal_normal = rejection_rates_per_parameter(τs, power_tnormal_normal)
# fig_2 = plot(title = "Power of 4 testing schemes", xlabel = "τ", ylabel = "Power", xlims=(0.0, 3.1), ylims = (-0.1, 1.1))
# plot!(fig_2, τs, rejection_rates_tnormal_normal[:,1], label = "dm", color = "red")
# plot!(fig_2, τs, rejection_rates_tnormal_normal[:,2], label = "hipm", color = "green")
# plot!(fig_2, τs, rejection_rates_tnormal_normal[:,3], label = "wow", color = "brown")
# plot!(fig_2, τs, rejection_rates_tnormal_normal[:,4], label = "Energy", color = "blue")
# filepath = joinpath(pwd(), "frechet/figure1")
# savefig(fig_2,joinpath(filepath, "power_tnormal_normal_varying_variance_n_top=$(n_top)_n_bottom=$(n_bottom)_S=$(S)_npermutation=$(n_permutation).png"))

# # # #minimum_power_dm = minimum(power_function_dm[])
# # # #hline!([minimum_power_dm], color=:red, linestyle=:dash, label="minimum power for dm")




# Example where DM should fail

Now let us do same simulations on different law of random probability measure: 

Define  
$$
\widetilde{P}^1 = \text{Normal}(\widetilde{\mu}^1, 1) \quad \text{where} \quad \widetilde{\mu}^1 \sim \text{Normal}\!\left(1, \tfrac{1}{3}\right),
$$
$$
\widetilde{P}^2 = \text{Normal}(\widetilde{\mu}^2, 1) \quad \text{where} \quad \widetilde{\mu}^2 \sim \text{Uniform}\!\left(0, 2\right).
$$
Then their associated two laws of RPM which are 
$$
Q^1 = \mathcal{L}\left(\widetilde{P}^1\right) \quad Q^2 = \mathcal{L}\left(\widetilde{P}^2\right),
$$
are different but they induce same Fréchet mean and variance

Then for each $\lambda \in (0,1)$ we can consider family of pairs 
$$
Q^! \quad \text{and} \quad Q_{\lambda} = \lambda Q^1 + (1 - \lambda)Q^2.
$$
We obtain two different laws with same Fréchet mean and variance

Below, we plot power functions as we vary $\lambda$.



# With Boostrap

To obtain the thresholds for HIPM and WoW when n_boostrap = 100,n_top = 100,n_bottom = 2000, we need 2.4 seconds per each boostrap. So 240 seconds in total

Once thresholds are obtained for each generated hierarchical samples we need 3 seconds to get decisions from all testing schemes (n_boost = 100).

Total time for S = 100 for one pair of measures is maximum 9 minutes





for n_bottom = 200, we need in total 103 seconds for thresholds for HIPM and WoW
Once thresholds are obtained we need 1.7 seconds for only one generated sample.
Total time in that case is 5 minutes

In [None]:
n_top, n_bottom = 100,2000
n_boostrap = 100
S = 100
θ = 0.05

Random.seed!(1234) # for reproducibility

a, b = 0.0, 2.0
μ, σ = 1.0, sqrt(1/3)

mixture(λ) = mixture_ppm(normal_normal(μ,σ), uniform_normal(a,b), λ) # low lambda means more to second distribution which here is uniform normal
power_normal_uniform(λ) = rejection_rate(normal_normal(μ,σ), mixture(λ), S, n_top, n_bottom, n_boostrap, θ, true) 

λs = collect(0.0:0.1:1.0)
rejection_rates_normal_uniform = rejection_rates_per_parameter(λs, power_normal_uniform)

fig_counter_example = plot(title = "Power of 4 testing schemes", xlabel = "λ", ylabel = "Power", xlims=(-0.1, 1.1), ylims = (-0.1, 1.1))
plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,1], label = "dm", color = "red",marker = (:circle, 4))
plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,2], label = "hipm", color = "green",marker = (:circle, 4))
plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,3], label = "wow", color = "brown",marker = (:circle, 4))
plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,4], label = "Energy", color = "blue",marker = (:circle, 4))

filepath = joinpath(pwd(), "frechet/counterexample")
savefig(fig_counter_example,joinpath(filepath, "power_normal_uniform_counter_example_boostrap_n_top=$(n_top)_n_bottom=$(n_bottom)_S=$(S)_nboostrap=$(n_boostrap).png"))





"/Users/giorgikanchaveli/Desktop/Two-samples-test/frechet/counterexample/power_normal_uniform_counter_example_boostrap_n_top=100_n_bottom=2000_S=1_nboostrap=1.png"

In [None]:
# fig_counter_example = plot(title = "Power of 4 testing schemes", xlabel = "λ", ylabel = "Power", xlims=(-0.1, 1.1), ylims = (-0.1, 1.1))
# scatter!(fig_counter_example, λs, rejection_rates_normal_uniform[:,1], label = "dm", color = "red")
# scatter!(fig_counter_example, λs, rejection_rates_normal_uniform[:,2], label = "hipm", color = "green")
# scatter!(fig_counter_example, λs, rejection_rates_normal_uniform[:,3], label = "wow", color = "brown")
# scatter!(fig_counter_example, λs, rejection_rates_normal_uniform[:,4], label = "Energy", color = "blue")


In [None]:
# fig_counter_example = plot(title = "Power of 4 testing schemes", xlabel = "λ", ylabel = "Power", xlims=(-0.1, 1.1), ylims = (-0.1, 1.1))
# plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,1], label = "dm", color = "red",marker = (:circle, 8))
# plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,2], label = "hipm", color = "green",marker = (:circle, 8))
# plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,3], label = "wow", color = "brown",marker = (:circle, 8))
# plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,4], label = "Energy", color = "blue",marker = (:circle, 8))

# filepath = joinpath(pwd(), "frechet/counterexample")
# savefig(fig_counter_example,joinpath(filepath, "power_normal_uniform_counter_example_boostrap_n_top=$(n_top)_n_bottom=$(n_bottom)_S=$(S)_nboostrap=$(n_boostrap).png"))

# With permutation

In [None]:
n_top, n_bottom = 100,2000
n_permutations = 100
S = 100
θ = 0.05

Random.seed!(1234) # for reproducibility

a, b = 0.0, 2.0
μ, σ = 1.0, sqrt(1/3)

mixture(λ) = mixture_ppm(normal_normal(μ,σ), uniform_normal(a,b), λ) # low lambda means more to second distribution which here is uniform normal
power_normal_uniform(λ) = rejection_rate(normal_normal(μ,σ), mixture(λ), S, n_top, n_bottom, n_permutations, θ, false)

λs = collect(0.0:0.1:1.0)
rejection_rates_normal_uniform = rejection_rates_per_parameter(λs, power_normal_uniform)

fig_counter_example = plot(title = "Power of 4 testing schemes", xlabel = "λ", ylabel = "Power", xlims=(-0.1, 1.1), ylims = (-0.1, 1.1))
plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,1], label = "dm", color = "red",marker = (:circle, 4))
plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,2], label = "hipm", color = "green",marker = (:circle, 4))
plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,3], label = "wow", color = "brown",marker = (:circle, 4))
plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,4], label = "Energy", color = "blue",marker = (:circle, 4))

filepath = joinpath(pwd(), "frechet/counterexample")
savefig(fig_counter_example,joinpath(filepath, "power_normal_uniform_counter_example_permutation_n_top=$(n_top)_n_bottom=$(n_bottom)_S=$(S)_npermutations=$(n_permutations).png"))





# # #minimum_power_dm = minimum(power_function_dm[])
# # #hline!([minimum_power_dm], color=:red, linestyle=:dash, label="minimum power for dm")





parameter is 0.0
time taken to get thresholds is 0.05356287956237793 seconds
time taken for S=1 is 1.1255271434783936 seconds
parameter is 0.25
time taken to get thresholds is 0.49570202827453613 seconds
time taken for S=1 is 0.023925065994262695 seconds
parameter is 0.5
time taken to get thresholds is 0.017979145050048828 seconds
time taken for S=1 is 0.01970696449279785 seconds
parameter is 0.75
time taken to get thresholds is 0.014227867126464844 seconds
time taken for S=1 is 0.017826080322265625 seconds
parameter is 1.0
time taken to get thresholds is 0.014303922653198242 seconds
time taken for S=1 is 0.024312973022460938 seconds


"/Users/giorgikanchaveli/Desktop/Two-samples-test/frechet/counterexample/power_normal_uniform_counter_example_permutation_n_top=2_n_bottom=200_S=1_npermutations=1.png"

In [None]:
# fig_counter_example = plot(title = "Power of 4 testing schemes", xlabel = "λ", ylabel = "Power", xlims=(-0.1, 1.1), ylims = (-0.1, 1.1))
# scatter!(fig_counter_example, λs, rejection_rates_normal_uniform[:,1], label = "dm", color = "red")
# scatter!(fig_counter_example, λs, rejection_rates_normal_uniform[:,2], label = "hipm", color = "green")
# scatter!(fig_counter_example, λs, rejection_rates_normal_uniform[:,3], label = "wow", color = "brown")
# scatter!(fig_counter_example, λs, rejection_rates_normal_uniform[:,4], label = "Energy", color = "blue")


In [None]:
# fig_counter_example = plot(title = "Power of 4 testing schemes", xlabel = "λ", ylabel = "Power", xlims=(-0.1, 1.1), ylims = (-0.1, 1.1))
# plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,1], label = "dm", color = "red")
# plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,2], label = "hipm", color = "green")
# plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,3], label = "wow", color = "brown")
# plot!(fig_counter_example, λs, rejection_rates_normal_uniform[:,4], label = "Energy", color = "blue")


# filepath = joinpath(pwd(), "frechet/counterexample")
# savefig(fig_counter_example,joinpath(filepath, "power_normal_uniform_counter_example_permutation_n_top=$(n_top)_n_bottom=$(n_bottom)_S=$(S)_permutations=$(n_permutations).png"))

# Does it matter if n_permutations = 100 or 1000?


In [None]:
# function rej_rates_denanova(q_1::PPM, q_2::PPM, S::Int, n_top::Int, n_boostrap::Int, θ::Float64)
#     # obtain rejection rates of denanova with boostrap
#     # for given laws of RPMs q_1 and q_2
#     # S : number of times we simulate two samples from given laws of RPM
#     # n_top : number of probability measures we simulate from each q
#     # n_boostrap : number of times we repeat bootstrap procedure to estimate the quantile of the test statistic.
    
#     rej_rate_denanova = 0.0

#     println("law of RPMS are: $(q_1)")
#     println("$(q_2)")

#     for s in 1:S
#         mu_1, mu_2 = [pm_1.μ for pm_1 in generate_prob_measures(q_1, n_top)], [pm_2.μ for pm_2 in generate_prob_measures(q_2, n_top)]
#         println("s is $(s)")
#         rej_rate_denanova += denanova_from_r(mu_1, mu_2; n_boostrap=100, θ = θ)
#     end
#     rej_rate_denanova /= S
#     return rej_rate_denanova
# end

In [None]:
# function rejections_anova_permutation_opt(δs::Vector{Float64})
#     rej_rate_denanova_1 = zeros(length(δs))
#     rej_rate_denanova_2 = zeros(length(δs))
#     for (i,δ) in enumerate(δs)
#         q_1, q_2 = tnormal_normal(0.0, 0.5, -10.0, 10.0), tnormal_normal(δ, 0.5, -10.0, 10.0)
#         rej_rate_denanova_1[i] = rej_rates_denanova(q_1, q_2, 1000, 100, 100, 0.05)
#         rej_rate_denanova_2[i] = rej_rates_denanova(q_1, q_2, 1000, 100, 1000, 0.05)
#     end
#     return rej_rate_denanova_1, rej_rate_denanova_2
# end


In [None]:
# n_top, n_bottom = 100,2000
# n_boostrap_1 = 100
# n_boostrap_2 = 1000
# S_1 = 1000
# S_2 = 1000
# θ = 0.05



# Random.seed!(1234) # for reproducibility
# δs = collect(-1.0:0.1:1.0)

# rej_rate_denanova_1, rej_rate_denanova_2 = rejections_anova_permutation_opt(δs)



In [None]:
# println("rejection rate 1 : $rej_rate_denanova_1")
# println("rejection rate 2 : $rej_rate_denanova_2")

In [None]:
# fig_denanova = plot(title = "Power of DenANOVA per diff n_boost", xlabel = "δ", ylabel = "Power", xlims=(-1.1, 1.1), ylims = (-0.1, 1.1))
# plot!(fig_denanova, δs, rej_rate_denanova_1, label = "n_boost = 100", color = "red")
# plot!(fig_denanova, δs, rej_rate_denanova_2, label = "n_boost = 1000", color = "blue")



In [None]:
# println("diff = ",rej_rate_denanova_2 - rej_rate_denanova_1)

In [None]:





# n_top, n_bottom = 100,2000
# n_boostrap = 100
# S = 100
# θ = 0.05

# Random.seed!(1234) # for reproducibility
# δs = collect(-1.0:0.1:1.0)
# rej_rate_denanova_per_delta, rej_rate_my_per_delta = zeros(length(δs)), zeros(length(δs))
# for (i, δ) in enumerate(δs)
#     q_1, q_2 = tnormal_normal(0.0, 0.5, -10.0, 10.0), tnormal_normal(δ, 0.5, -10.0, 10.0)

#     rej_rate_denanova, rej_rate_my = rej_rates_denanova_my(q_1, q_2, S, n_top, n_boostrap, θ)
#     rej_rate_denanova_per_delta[i] = rej_rate_denanova
#     rej_rate_my_per_delta[i] = rej_rate_my
# end
# fig_denanova_vs_my = plot(title = "Power of DenANOVA vs my implementation of DM", xlabel = "δ", ylabel = "Power", xlims=(-1.1, 1.1), ylims = (-0.1, 1.1))
# plot!(fig_denanova_vs_my, δs, rej_rate_denanova_per_delta, label = "DenANOVA", color = "red")
# plot!(fig_denanova_vs_my, δs, rej_rate_my_per_delta, label = "My DM", color = "blue")








In [None]:
# rej_rate_denanova_per_delta

In [None]:
# differences = abs.(rej_rate_my_per_delta .- rej_rate_denanova_per_delta)

In [None]:
# plot(δs, differences, title = "Difference between DenANOVA and my implementation of DM", xlabel = "δ",ylabel = "Absolute difference in power", xlims=(-1.1, 1.1), ylims = (-0.05, 0.2), label = "Absolute difference", color = "purple")

In [None]:
# function rej_rates_denanova_my(q_1::PPM, q_2::PPM, n_top::Int, n_bottom::Int, S::Int, n_boostrap::Int, θ::Float64)
#     # This function computes the rejection using DenANOVA from R package "frechet" and my implementation of Dubey & Muller test

#     # Inputs:
#         # q_1, q_2 : laws of two RPMs
#         # n_top : number of atoms in hierarchical samples generated from RPMs
#         # n_bottom : number of observations generated from each atom in hierarchical samples
#         # S : number of simulations to approximate the rejection rates
#         # n_boostrap: number of times we repeat bootstrap procedure to estimate the quantile of the test statistic
#         # θ : significance level, default value is 0.05
    
#     rej_rate_denanova, rej_rate_my = 0.0, 0.0
#     for s in 1:S
#         prob_measures_1 = generate_prob_measures(q_1, n_top) # generate n_top probability measures from law q_1
#         prob_measures_2 = generate_prob_measures(q_2, n_top) # generate n_top probability measures from law q_2
#         mu_1 = [pm.μ for pm in prob_measures_1] # collect means of probability measures from first sample
#         mu_2 = [pm.μ for pm in prob_measures_2] # collect means of probability measures from second sample

#         rej_rate_denanova += decide_denanova_from_r(mu_1, mu_2; n_boostrap=n_boostrap, θ = θ)
#         rej_rate_my += decide_dm_boostrap(mu_1, mu_2, n_boostrap, θ)
#     end
#     return rej_rate_denanova / S, rej_rate_my / S
# end 


