In [1]:
using Pkg

Pkg.add("Plots")

[33m[1m│ [22m[39m  exception = RequestError: Could not resolve host: pkg.julialang.org while requesting https://pkg.julialang.org/registries
[33m[1m└ [22m[39m[90m@ Pkg.Registry C:\Users\User\.julia\juliaup\julia-1.11.6+0.x64.w64.mingw32\share\julia\stdlib\v1.11\Pkg\src\Registry\Registry.jl:77[39m
[32m[1m    Updating[22m[39m registry at `C:\Users\User\.julia\registries\General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `C:\Users\User\.julia\environments\v1.11\Project.toml`
[32m[1m  No Changes[22m[39m to `C:\Users\User\.julia\environments\v1.11\Manifest.toml`


In [2]:
Pkg.add("Tulip")

[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `C:\Users\User\.julia\environments\v1.11\Project.toml`
[32m[1m  No Changes[22m[39m to `C:\Users\User\.julia\environments\v1.11\Manifest.toml`


In [3]:
using Plots


include("distributions.jl")
include("structures.jl")
include("distances/w_distance.jl") # ar mchirdeba? 
include("distances/new_distance.jl")
include("distances/distance_Wasserstein.jl")


ww (generic function with 2 methods)

In [4]:
function sample_distances_thresholds(q_1::PPM, q_2::PPM, n::Int, m::Int, s::Int, θs::Vector{Float64}, n_permutations::Int)
    # This functions samples distances between hierarchical empirical measures and also gets thresholds for each of the distance using permutatoin approach
    
    # q_1 :: Law of random probabiity measure Q^1
    # q_2 :: Law of random probabiity measure Q^2
    # n :: Number of rows in hierarchical sample
    # m :: Number of columns in hierarchical sample
    # s :: Number of distances sampled
    # θs :: Vector of probability levels for thresholds
    # n_permutations :: Number of permutations for permutation approach

    d_wws = Vector{Float64}(undef, s)
    d_lips = Vector{Float64}(undef, s)

    perm_thresholds = zeros(length(θs), s, 2) # first dimension corresponds to probability level θ, second one - generated empirical measure, third one - distance function(WoW or HIPM)

    for i in 1:s
        println("s = $i")
        emp_1, emp_2 = generate_emp(q_1, n, m), generate_emp(q_2, n, m) # Q^1_{n,m}, Q^2_{n,m}
        d_wws[i] = ww(emp_1, emp_2)
        d_lips[i] = dlip(emp_1, emp_2)
        
        # now we get threshold for each distance per each probability level θ. This is different from threshold via Rademacher complexity because here threshold
        # depends on the generated empirical measures.

        # To get threshold we first obtain approximate samples of d(Q^1_{n,m}, Q^2_{n,m}) that we call permuted_samples
        permuted_samples_ww = zeros(n_permutations)
        permuted_samples_dlip = zeros(n_permutations)

        for k in 1:n_permutations
            total_rows = vcat(emp_1.atoms, emp_2.atoms)
            random_indices = randperm(2n) # indices to distribute rows to new hierarchical meausures

            emp_1_permuted = emp_ppm(total_rows[random_indices[1:n],:], n, m, emp_1.a, emp_1.b)
            emp_2_permuted = emp_ppm(total_rows[random_indices[n+1:end],:], n, m, emp_2.a, emp_2.b)

            permuted_samples_ww[k] = ww(emp_1_permuted, emp_2_permuted)
            permuted_samples_dlip[k] = dlip(emp_1_permuted, emp_2_permuted)
        end

        # Now we compute thresholds for each distance function and probability level θ. Actually they are not exactly same thresholds as in theory because we do not rescale by √(n/2)
        perm_thresholds[:, i, 1] = quantile(permuted_samples_ww, 1 .- θs) # thresholds for WoW 
        perm_thresholds[:, i, 2] = quantile(permuted_samples_dlip, 1 .- θs) # thresholds for HIPM
    end

    return d_wws, d_lips, perm_thresholds
end

sample_distances_thresholds (generic function with 1 method)

In [5]:
function rejection_rate(d_wws::Vector{Float64}, d_lips::Vector{Float64}, perm_thresholds::Array{Float64,3}, θs::Vector{Float64})
    # Given sampled distances and thresholds we compute rejection rates for each distance function and probability level θ

    n = length(d_wws)
    n_θ = length(θs)
    rej_rates = zeros(n_θ, 2) # first column corresponds to WoW, second - dlip. Row corresponds to levels of θs.

    for i in 1:n_θ
        thresh_ww = perm_thresholds[i, :, 1]
        thresh_dlip = perm_thresholds[i, :, 2]

        rej_rates[i, 1] = sum(d_wws .> thresh_ww) / n
        rej_rates[i, 2] = sum(d_lips .> thresh_dlip) / n
    end

    return rej_rates
end



rejection_rate (generic function with 1 method)

Now let us write a function that given laws of random probability measures and sample sizes, provides plots for empirical threshold vs permutation threshold, and rejection rates. Also for the debug it will return distances and thresholds
  

In [6]:
function simulate_perm(q_1::PPM, q_2::PPM, n::Int, m::Int, s::Int, θs::Vector{Float64}, n_permutations::Int, samemeasures::Bool)
    # q_1 : Law of random probabiity measure Q^1
    # q_2 : Law of random probabiity measure Q^2
    # n : Number of rows in hierarchical sample
    # m : Number of columns in hierarchical sample
    # s : Number of distances sampled
    # θs : Vector of probability levels for thresholds
    # n_permutations : Number of permutations for permutation approach

    #samemeasures : It is true if Q^1 = Q^2 and in that case rejection plot's title is false positive rate, otherwise - true positive rate.
    d_wws, d_lips, perm_thresholds = sample_distances_thresholds(q_1, q_2, n, m, s, θs, n_permutations)

    # now produce thresholds plot ( empirical and threshold from permutation approach)


    empirical_thresholds_ww = quantile(d_wws, 1 .- θs)
    empirical_thresholds_dlip = quantile(d_lips, 1 .- θs)

    # recall that perm_threshold has size length(θs, s, 2)
    perm_thresh_1_ww = perm_thresholds[:, 1, 1]
    perm_thresh_1_dlip = perm_thresholds[:, 1, 2]


    emp_vs_perm_thresholds_ww = plot()
    plot!(emp_vs_perm_thresholds_ww, θs, empirical_thresholds_ww, label="Empirical", xlabel="Probability Level", ylabel="Threshold", title="Empirical vs Permutation Thresholds WoW")
    plot!(emp_vs_perm_thresholds_ww, θs, perm_thresh_1_ww, label="Permutation", linestyle=:dash)

    emp_vs_perm_thresholds_dlip = plot()
    plot!(emp_vs_perm_thresholds_dlip, θs, empirical_thresholds_dlip, label="Empirical", xlabel="Probability Level", ylabel="Threshold", title="Empirical vs Permutation Thresholds HIPM")
    plot!(emp_vs_perm_thresholds_dlip, θs, perm_thresh_1_dlip, label="Permutation", linestyle=:dash)

    emp_vs_perm_plot = [emp_vs_perm_thresholds_ww, emp_vs_perm_thresholds_dlip]
    # Now get the rejection rates and rejection rate plots per each probability level

    rej_rates = rejection_rate(d_wws, d_lips, perm_thresholds, θs)

    rate_plot = plot()
    if samemeasures
        plot!(rate_plot, [0, 1], [0, 1], linestyle=:dash, color=:gray,label=false)
        plot!(rate_plot, θs, rej_rates[:, 1], label="WoW", xlabel="Probability Level", ylabel="fp", title="False Positive Rates", ratio = 1.0, xlims = (0,1), ylims = (0,1),color = "green")
        plot!(rate_plot, θs, rej_rates[:, 2], label="HIPM", xlabel="Probability Level", ylabel="fp", title="False Positive Rates", ratio = 1.0, xlims = (0,1), ylims = (0,1),color = "blue")
           

    else
        plot!(rate_plot, θs, rej_rates[:, 1], label="WoW", xlabel="Probability Level", ylabel="tp", title="True Positive Rates", ratio = 1.0, xlims = (0,1), ylims = (0,1),color = "green")
        plot!(rate_plot, θs, rej_rates[:, 2], label="HIPM", xlabel="Probability Level", ylabel="tp", title="True Positive Rates", ratio = 1.0, xlims = (0,1), ylims = (0,1),color = "blue")
    end

    return d_wws, d_lips, perm_thresholds, rej_rates, emp_vs_perm_plot, rate_plot
end

simulate_perm (generic function with 1 method)

In [7]:
# # define laws of random probability measures

# n_1 = 10
# a,b = 0.0,1.0
# mu_1 = (rand(n_1) .+ 5).*2
# mu_2 = mu_1 .+ rand(n_1) / 5
# sigma_1 = rand(n_1) .+ 1
# sigma_2 = sigma_1 .+ rand(n_1) / 5


# q_discr_normal_1 = discr_normal(n_1, mu_1, sigma_1, a, b)
# q_discr_normal_2 = discr_normal(n_1, mu_2, sigma_2, a, b)



# # define parameters for generating hierarchical empirical measures

# s = 20 # number of times to sample distances between hierarchical empirical measurse
# n = 40
# m = 2

# # simulation & threshld parameters

# θs = collect(0.0:0.01:1.0) # probability levels for which we get thresholds
# n_permutations = 50


# t = time()
# d_wws, d_lips, perm_thresholds, rej_rates, emp_vs_perm_plot, rate_plot = simulate_perm(q_discr_1, q_discr_2, n, m, s, θs, n_permutations, false)
# t = time() - t
# println("time taken: $t seconds")


In [8]:

# q_discr_1 = discrrpm(10,10,0.0,1.0) 
# q_discr_2 = discrrpm(10,10,0.0,1.0)

# s = 50 # number of times to sample distances between hierarchical empirical measurse
# n = 50
# m = 10
# θs = collect(0.0:0.01:1.0) # probability levels for which we get thresholds
# n_permutations = 50

# t = time()
# d_wws, d_lips, perm_thresholds, rej_rates, emp_vs_perm_plot, rate_plot = simulate_perm(q_discr_1, q_discr_1, n, m, s, θs, n_permutations, true)
# t = time() - t
# println("time taken: $t seconds")


In [9]:
# plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))

In [10]:
# rate_plot

In [11]:
# # define laws of random probability measures

# n_1 = 10
# a,b = 0.0,1.0
# mu_1 = (rand(n_1) .+ 5).*2
# mu_2 = mu_1 .+ rand(n_1) / 5
# sigma_1 = rand(n_1) .+ 1
# sigma_2 = sigma_1 .+ rand(n_1) / 5


# q_discr_normal_1 = discr_normal(n_1, mu_1, sigma_1, a, b)
# q_discr_normal_2 = discr_normal(n_1, mu_2, sigma_2, a, b)



# # define parameters for generating hierarchical empirical measures

# s = 1 # number of times to sample distances between hierarchical empirical measurse
# n = 40
# m = 2

# # simulation & threshld parameters

# θs = collect(0.0:0.01:1.0) # probability levels for which we get thresholds
# n_permutations = 50


# t = time()
# d_wws, d_lips, perm_thresholds, rej_rates, emp_vs_perm_plot, rate_plot = simulate_perm(q_discr_1, q_discr_2, n, m, s, θs, n_permutations, false)
# t = time() - t
# println("time taken: $t seconds")


In [12]:
function savefig_discr_normal(n_1::Int, a::Float64, b::Float64, n::Int, m::Int, s::Int, θs::Vector{Float64}, n_permutations::Int, samemeasures::Bool)


    # n_1 : number of atoms for Q^1 and Q^2. Those atoms are discrete probability measures themselves.   (maybe change notation?????)
    # We have atoms which are normal distributions. 

    # n :: Number of rows in hierarchical sample
    # m :: Number of columns in hierarchical sample
    # s :: Number of distances sampled
    # θs :: Vector of probability levels for thresholds
    # n_permutations :: Number of permutations for permutation approach

    # samemeasures :: Boolean indicating whether to use same measures for both Q^1 and Q^2
    
    mu_1 = (rand(n_1) .+ 5).*2
    mu_2 = mu_1 .+ rand(n_1) / 5
    sigma_1 = rand(n_1) .+ 1
    sigma_2 = sigma_1 .+ rand(n_1) / 5

    q_discr_normal_1 = discr_normal(n_1, mu_1, sigma_1, a, b)
    
    if samemeasures
        
        q_discr_normal_2 = q_discr_normal_1

    else
        q_discr_normal_2 = discr_normal(n_1, mu_2, sigma_2, a, b)
    end

    d_wws, d_lips, perm_thresholds, rej_rates, emp_vs_perm_plot, rate_plot = simulate_perm(q_discr_normal_1, q_discr_normal_2, n, m, s, θs, n_permutations, samemeasures)

    filepath = joinpath(pwd(), "newplots/discr_normal")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_discr_normal_n1=$(n_1)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures=$(samemeasures).png"))
    savefig(rate_plot, joinpath(filepath, "rejection_rates_discr_normal_n1=$(n_1)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures=$(samemeasures).png"))
end

savefig_discr_normal (generic function with 1 method)

In [13]:
function savefig_discr_discr(n_1::Int, n_2::Int, a::Float64, b::Float64, n::Int, m::Int, s::Int, θs::Vector{Float64}, n_permutations::Int, samemeasures::Bool)
    
    
    # n_1 : number of atoms for Q^1 and Q^2. Those atoms are discrete probability measures themselves.   (maybe change notation?????)
    # n_2 : number of atoms for each of the discrete probability measure that are atoms of laws of RPM. It is same accross all atoms of Q^1 and Q^2.     (maybe change notation?????)
    # those inner probability measures are on [a,b]


    # n :: Number of rows in hierarchical sample
    # m :: Number of columns in hierarchical sample
    # s :: Number of distances sampled
    # θs :: Vector of probability levels for thresholds
    # n_permutations :: Number of permutations for permutation approach

    # samemeasures :: Boolean indicating whether to use same measures for both Q^1 and Q^2
    q_discr_1 = discrrpm(n_1,n_2,a,b)
    if samemeasures
        q_discr_2 = q_discr_1
    else 
        q_discr_2 = discrrpm(n_1,n_2,a,b)
    end
    
    d_wws, d_lips, perm_thresholds, rej_rates, emp_vs_perm_plot, rate_plot = simulate_perm(q_discr_1, q_discr_2, n, m, s, θs, n_permutations, samemeasures)

    filepath = joinpath(pwd(), "newplots/discr_discr")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_discr_discr_n1=$(n_1)_n2=$(n_2)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures=$(samemeasures).png"))
    savefig(rate_plot, joinpath(filepath, "rejection_rates_discr_discr_n1=$(n_1)_n2=$(n_2)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures=$(samemeasures).png"))
end

savefig_discr_discr (generic function with 1 method)

Above methods do not save ROC, so we define new function 

In [14]:
# simulations for discr_normal


# n_1 = 10
# a, b = 0.0, 1.0
# \mu_1  = 2*(rand(n_1) + 4)
# \mu_2 = \mu_1
# n = 40
# m = 2
# s = 30
# θs = collect(0.0:0.01:1.0)
# n_permutations = 30


In [15]:
# # Simulations for Discr_discr

# n_1 = 12
# n_2 = 12
# a, b = 0.0, 1.0
# n = 1
# m = 2
# s = 80
# θs = collect(0.0:0.01:1.0)
# n_permutations = 75

# savefig_discr_discr(n_1, n_2, a, b, 80, 2, s, θs, n_permutations, false)
# savefig_discr_discr(n_1, n_2, a, b, 80, 2, s, θs, n_permutations, true)
# savefig_discr_discr(n_1, n_2, a, b, 80, 20, s, θs, n_permutations, false)
# savefig_discr_discr(n_1, n_2, a, b, 80, 20, s, θs, n_permutations, true)





In [16]:
# # simulations for discr_normal

# n_1 = 12
# a, b = 0.0, 1.0


# n = 1
# m = 2
# s = 80
# θs = collect(0.0:0.01:1.0)
# n_permutations = 75


# savefig_discr_normal(n_1, a, b, 80, 2, s, θs, n_permutations, false)
# savefig_discr_normal(n_1, a, b, 80, 2, s, θs, n_permutations, true)
# savefig_discr_normal(n_1, a, b, 80, 20, s, θs, n_permutations, false)
# savefig_discr_normal(n_1, a, b, 80, 20, s, θs, n_permutations, true)






In [17]:
# Simulations for Dirichlet process



In [18]:
function savefig_discr_discr_all(n_1::Int, n_2::Int, a::Float64, b::Float64, n::Int, m::Int, s::Int, θs::Vector{Float64}, n_permutations::Int)
    
    
    # n_1 : number of atoms for Q^1 and Q^2. Those atoms are discrete probability measures themselves.   (maybe change notation?????)
    # n_2 : number of atoms for each of the discrete probability measure that are atoms of laws of RPM. It is same accross all atoms of Q^1 and Q^2.     (maybe change notation?????)
    # those inner probability measures are on [a,b]


    # n :: Number of rows in hierarchical sample
    # m :: Number of columns in hierarchical sample
    # s :: Number of distances sampled
    # θs :: Vector of probability levels for thresholds
    # n_permutations :: Number of permutations for permutation approach
    println("Simulating discr discr with n = $n, m = $m")

    q_discr_1 = discrrpm(n_1,n_2,a,b)
    q_discr_2 = discrrpm(n_1,n_2,a,b)

    d_wws, d_lips, perm_thresholds, fp_rates, emp_vs_perm_plot, fp_plot = simulate_perm(q_discr_1, q_discr_1, n, m, s, θs, n_permutations, true)

    filepath = joinpath(pwd(), "newplots/discr_discr")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_discr_discr_n1=$(n_1)_n2=$(n_2)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))
    savefig(fp_plot, joinpath(filepath, "falsepositive_rates_discr_discr_n1=$(n_1)_n2=$(n_2)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))

    d_wws, d_lips, perm_thresholds, tp_rates, emp_vs_perm_plot, tp_plot = simulate_perm(q_discr_1, q_discr_2, n, m, s, θs, n_permutations, false)

    filepath = joinpath(pwd(), "newplots/discr_discr")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_discr_discr_n1=$(n_1)_n2=$(n_2)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))
    savefig(tp_plot, joinpath(filepath, "truepositive_rates_discr_discr_n1=$(n_1)_n2=$(n_2)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))

    roc_plot = plot(title = "ROC Curve", ratio = 1.0, xlims = (0,1), ylims = (0,1), xlabel = "False Positive Rate", ylabel = "True Positive Rate")
    roc_plot = plot!(roc_plot, [0, 1], [0, 1], linestyle=:dash, color=:gray,label=false)
    roc_plot = plot!(roc_plot, fp_rates[:, 1], tp_rates[:, 1], label = "WoW", color = "green")
    roc_plot = plot!(roc_plot, fp_rates[:, 2], tp_rates[:, 2], label = "HIPM", color = "blue")
    savefig(roc_plot, joinpath(filepath, "ROC_discr_discr_n1=$(n_1)_n2=$(n_2)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations).png"))


end

savefig_discr_discr_all (generic function with 1 method)

In [19]:
function savefig_discr_normal_all(n_1::Int, a::Float64, b::Float64, n::Int, m::Int, s::Int, θs::Vector{Float64}, n_permutations::Int)
    
    
    # n_1 : number of atoms for Q^1 and Q^2. Those atoms are discrete probability measures themselves.   (maybe change notation?????)
    # n_1 : number of atoms for Q^1 and Q^2. Those atoms are discrete probability measures themselves.   (maybe change notation?????)
    # We have atoms which are normal distributions. 


    # n :: Number of rows in hierarchical sample
    # m :: Number of columns in hierarchical sample
    # s :: Number of distances sampled
    # θs :: Vector of probability levels for thresholds
    # n_permutations :: Number of permutations for permutation approach
    println("Simulating discr normal with n = $n, m = $m")

    mu_1 = (rand(n_1) .+ 5).*2
    mu_2 = mu_1 .+ rand(n_1) / 5
    sigma_1 = rand(n_1) .+ 1
    sigma_2 = sigma_1 .+ rand(n_1) / 5

    
    q_discr_normal_1 = discr_normal(n_1, mu_1, sigma_1, a, b)
    q_discr_normal_2 = discr_normal(n_1, mu_2, sigma_2, a, b)

    d_wws, d_lips, perm_thresholds, fp_rates, emp_vs_perm_plot, fp_plot = simulate_perm(q_discr_normal_1, q_discr_normal_1, n, m, s, θs, n_permutations, true)

    filepath = joinpath(pwd(), "newplots/discr_normal")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_discr_normal_n1=$(n_1)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))
    savefig(fp_plot, joinpath(filepath, "falsepositive_rates_discr_normal_n1=$(n_1)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))

    d_wws, d_lips, perm_thresholds, tp_rates, emp_vs_perm_plot, tp_plot = simulate_perm(q_discr_normal_1, q_discr_normal_2, n, m, s, θs, n_permutations, false)

    filepath = joinpath(pwd(), "newplots/discr_normal")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_discr_normal_n1=$(n_1)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))
    savefig(tp_plot, joinpath(filepath, "truepositive_rates_discr_normal_n1=$(n_1)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))

    roc_plot = plot(title = "ROC Curve", ratio = 1.0, xlims = (0,1), ylims = (0,1), xlabel = "False Positive Rate", ylabel = "True Positive Rate")
    roc_plot = plot!(roc_plot, [0, 1], [0, 1], linestyle=:dash, color=:gray,label=false)
    roc_plot = plot!(roc_plot, fp_rates[:, 1], tp_rates[:, 1], label = "WoW",color = "green")
    roc_plot = plot!(roc_plot, fp_rates[:, 2], tp_rates[:, 2], label = "HIPM",color = "blue")
    savefig(roc_plot, joinpath(filepath, "ROC_discr_normal_n1=$(n_1)_n2=$(n_2)_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations).png"))

end

savefig_discr_normal_all (generic function with 1 method)

In [20]:
function savefig_dirichlet_all(α_1::Float64, α_2::Float64, P_0_1::Function, P_0_2::Function, a::Float64, b::Float64, n::Int, m::Int, s::Int, θs::Vector{Float64}, n_permutations::Int)
    
    
    # We have Dirichlet process(α,P_0)
    # P_0_i should be able to generate atoms.

    # n :: Number of rows in hierarchical sample
    # m :: Number of columns in hierarchical sample
    # s :: Number of distances sampled
    # θs :: Vector of probability levels for thresholds
    # n_permutations :: Number of permutations for permutation approach
    println("Generating Dirichlet processes, n = $n, m = $m")
    q_dp_1 = DP(α_1, P_0_1, a, b)
    q_dp_2 = DP(α_2, P_0_2, a, b)

    d_wws, d_lips, perm_thresholds, fp_rates, emp_vs_perm_plot, fp_plot = simulate_perm(q_dp_1, q_dp_1, n, m, s, θs, n_permutations, true)

    filepath = joinpath(pwd(), "newplots/dirichlet")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))
    savefig(fp_plot, joinpath(filepath, "falsepositive_rates_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))

    d_wws, d_lips, perm_thresholds, tp_rates, emp_vs_perm_plot, tp_plot = simulate_perm(q_dp_1, q_dp_2, n, m, s, θs, n_permutations, false)

    filepath = joinpath(pwd(), "newplots/dirichlet")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))
    savefig(tp_plot, joinpath(filepath, "truepositive_rates_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))

    roc_plot = plot(title = "ROC Curve", ratio = 1.0, xlims = (0,1), ylims = (0,1), xlabel = "False Positive Rate", ylabel = "True Positive Rate")
    roc_plot = plot!(roc_plot, [0, 1], [0, 1], linestyle=:dash, color=:gray,label=false)
    roc_plot = plot!(roc_plot, fp_rates[:, 1], tp_rates[:, 1], label = "WoW", color = "green")
    roc_plot = plot!(roc_plot, fp_rates[:, 2], tp_rates[:, 2], label = "HIPM", color = "blue")

    savefig(roc_plot, joinpath(filepath, "ROC_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations).png"))

end

savefig_dirichlet_all (generic function with 1 method)

In [21]:
function savefig_dirichlet_diffalpha_all(α_1::Float64, α_2::Float64, P_0_1::Function, P_0_2::Function, a::Float64, b::Float64, n::Int, m::Int, s::Int, θs::Vector{Float64}, n_permutations::Int)
    
    
    # We have Dirichlet process(α,P_0)
    # P_0_i should be able to generate atoms.

    # n :: Number of rows in hierarchical sample
    # m :: Number of columns in hierarchical sample
    # s :: Number of distances sampled
    # θs :: Vector of probability levels for thresholds
    # n_permutations :: Number of permutations for permutation approach
    println("Generating Dirichlet processes, n = $n, m = $m")
    q_dp_1 = DP(α_1, P_0_1, a, b)
    q_dp_2 = DP(α_2, P_0_1, a, b)

    d_wws, d_lips, perm_thresholds, fp_rates, emp_vs_perm_plot, fp_plot = simulate_perm(q_dp_1, q_dp_1, n, m, s, θs, n_permutations, true)

    filepath = joinpath(pwd(), "newplots/dirichlet")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_dirichlet_diffalpha_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))
    savefig(fp_plot, joinpath(filepath, "falsepositive_rates_dirichlet_diffalpha_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))

    d_wws, d_lips, perm_thresholds, tp_rates, emp_vs_perm_plot, tp_plot = simulate_perm(q_dp_1, q_dp_2, n, m, s, θs, n_permutations, false)

    filepath = joinpath(pwd(), "newplots/dirichlet")
    emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
    savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_dirichlet_diffalpha_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))
    savefig(tp_plot, joinpath(filepath, "truepositive_rates_dirichlet_diffalpha_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))

    roc_plot = plot(title = "ROC Curve", ratio = 1.0, xlims = (0,1), ylims = (0,1), xlabel = "False Positive Rate", ylabel = "True Positive Rate")
    roc_plot = plot!(roc_plot, [0, 1], [0, 1], linestyle=:dash, color=:gray,label=false)
    roc_plot = plot!(roc_plot, fp_rates[:, 1], tp_rates[:, 1], label = "WoW", color = "green")
    roc_plot = plot!(roc_plot, fp_rates[:, 2], tp_rates[:, 2], label = "HIPM", color = "blue")

    savefig(roc_plot, joinpath(filepath, "ROC_dirichlet_diffalpha_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations).png"))

end

savefig_dirichlet_diffalpha_all (generic function with 1 method)

In [22]:
# Simulations for Discr_discr
Random.seed!(123456)
n_1 = 15
n_2 = 15
a, b = 0.0, 1.0


s = 200


θs = collect(0.0:0.01:1.0)
n_permutations = 80






#savefig_discr_discr_all(n_1, n_2, a, b, 90, 2, s, θs, n_permutations,)
#savefig_discr_discr_all(n_1, n_2, a, b, 90, 30, s, θs, n_permutations,)





80

In [23]:
# simulations for discr_normal
Random.seed!(123456)

n_1 = 15
a, b = 0.0, 1.0


s = 200

θs = collect(0.0:0.01:1.0)
n_permutations = 80



#savefig_discr_normal_all(n_1, a, b, 90, 2, s, θs, n_permutations,)
#savefig_discr_normal_all(n_1, a, b, 90, 30, s, θs, n_permutations,)



80

In [24]:

function probability(baseMeasure::String)
    # function to generate observation either from uniform(-1/2,1/2) or from splitting measure
    if baseMeasure == "same" # Uniform(-1/2,1/2)
        return rand() - 0.5
    elseif baseMeasure == "splitting"  # sample either close to -1 or close to 1
        atom = rand()
        mixture = rand((0,1))
        return mixture * ( -1. + 0.25 * atom ) + (1 - mixture) * (0.75 + 0.25 * atom)
    end

end

# simulations for Dirichlet

Random.seed!(123456)


α_1, α_2 = 1.0, 1.0
P_0_1 = ()->probability("same")
P_0_2 = ()->probability("splitting")

a, b = -1.0, 1.0


s = 200

θs = collect(0.0:0.01:1.0)
n_permutations = 80


#savefig_dirichlet_all(α_1, α_2, P_0_1, P_0_2, a, b, 15, 2, s, θs, n_permutations)
#savefig_dirichlet_all(α_1, α_2, P_0_1, P_0_2, a, b, 25, 2, s, θs, n_permutations)


# qvemot satestoa

#n = 100
#m = 2
#n_permutations = 20
#s = 25



#q_dp_1 = DP(α_1, P_0_1, a, b)
#q_dp_2 = DP(1.5, P_0_1, a, b)

#d_wws, d_lips, perm_thresholds, fp_rates, emp_vs_perm_plot, fp_plot = simulate_perm(q_dp_1, q_dp_1, n, m, s, θs, n_permutations, true)

#filepath = joinpath(pwd(), "newplots/dirichlet")
#emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
#savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))
#savefig(fp_plot, joinpath(filepath, "falsepositive_rates_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_samemeasures.png"))

#d_wws, d_lips, perm_thresholds, tp_rates, emp_vs_perm_plot, tp_plot = simulate_perm(q_dp_1, q_dp_2, n, m, s, θs, n_permutations, false)

#filepath = joinpath(pwd(), "newplots/dirichlet")
#emp_perm = plot(emp_vs_perm_plot[1], emp_vs_perm_plot[2],layout = (2,1))
#savefig(emp_perm, joinpath(filepath, "empirical_vs_permutation_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))
#savefig(tp_plot, joinpath(filepath, "truepositive_rates_dirichlet_n=$(n)_m=$(m)_s=$(s)_permutations=$(n_permutations)_diffmeasures.png"))

#roc_plot = plot(title = "ROC Curve", ratio = 1.0, xlims = (0,1), ylims = (0,1), xlabel = "False Positive Rate", ylabel = "True Positive Rate")
#roc_plot = plot!(roc_plot, [0, 1], [0, 1], linestyle=:dash, color=:gray,label=false)
#roc_plot = plot!(roc_plot, fp_rates[:, 1], tp_rates[:, 1], label = "WoW", color = "green")
#roc_plot = plot!(roc_plot, fp_rates[:, 2], tp_rates[:, 2], label = "HIPM", color = "blue")




80

In [25]:
#fp_plot

In [26]:
#tp_plot

In [27]:
#roc_plot

In [28]:

function probability(baseMeasure::String)
    # function to generate observation either from uniform(-1/2,1/2) or from splitting measure
    if baseMeasure == "same" # Uniform(-1/2,1/2)
        return rand() - 0.5
    elseif baseMeasure == "splitting"  # sample either close to -1 or close to 1
        atom = rand()
        mixture = rand((0,1))
        return mixture * ( -1. + 0.25 * atom ) + (1 - mixture) * (0.75 + 0.25 * atom)
    end

end

# simulations for Dirichlet

Random.seed!(123456)


α_1 = 1.0
α_2 = 1.5
P_0_1 = ()->probability("same")
P_0_2 = ()->probability("splitting")

a, b = -1.0, 1.0


s = 200

θs = collect(0.0:0.01:1.0)
n_permutations = 80

savefig_dirichlet_diffalpha_all(α_1, α_2, P_0_2, P_0_2, a, b, 50, 2, s, θs, n_permutations)
savefig_dirichlet_diffalpha_all(α_1, α_2, P_0_2, P_0_2, a, b, 80, 2, s, θs, n_permutations)
savefig_dirichlet_diffalpha_all(α_1, α_2, P_0_2, P_0_2, a, b, 80, 7, s, θs, n_permutations)
savefig_dirichlet_diffalpha_all(α_1, α_2, P_0_2, P_0_2, a, b, 80, 15, s, θs, n_permutations)


Generating Dirichlet processes, n = 50, m = 2
s = 1
s = 2
s = 3
s = 4
s = 5
s = 6
s = 7
s = 8
s = 9
s = 10
s = 11
s = 12
s = 13
s = 14
s = 15
s = 16
s = 17
s = 18
s = 19
s = 20
s = 21
s = 22
s = 23
s = 24
s = 25
s = 26
s = 27
s = 28
s = 29
s = 30
s = 31
s = 32
s = 33
s = 34
s = 35
s = 36
s = 37
s = 38
s = 39
s = 40
s = 41
s = 42
s = 43
s = 44
s = 45
s = 46
s = 47
s = 48
s = 49
s = 50
s = 51
s = 52
s = 53
s = 54
s = 55
s = 56
s = 57
s = 58
s = 59
s = 60
s = 61
s = 62
s = 63
s = 64
s = 65
s = 66
s = 67
s = 68
s = 69
s = 70
s = 71
s = 72
s = 73
s = 74
s = 75
s = 76
s = 77
s = 78
s = 79
s = 80
s = 81
s = 82
s = 83
s = 84
s = 85
s = 86
s = 87
s = 88
s = 89
s = 90
s = 91
s = 92
s = 93
s = 94
s = 95
s = 96
s = 97
s = 98
s = 99
s = 100
s = 101
s = 102
s = 103
s = 104
s = 105
s = 106
s = 107
s = 108
s = 109
s = 110
s = 111
s = 112
s = 113
s = 114
s = 115
s = 116
s = 117
s = 118
s = 119
s = 120
s = 121
s = 122
s = 123
s = 124
s = 125
s = 126
s = 127
s = 128
s = 129
s = 130
s = 131
s = 132
s = 13

"c:\\Users\\User\\Desktop\\Two-samples-test\\newplots\\dirichlet\\ROC_dirichlet_diffalpha_n=80_m=15_s=200_permutations=80.png"