# Compare times to compute distances w.r.t HIPM and WoW

In [None]:
using Plots


include("distributions.jl")

include("distances/new_distance.jl")
include("distances/distance_Wasserstein.jl")
using DataFrames
using CSV



In [None]:
# Firstly we define two Dirichlet Processes with different concentration parameters and the same base distribution.



α_1, α_2 = 1.0, 2.0
P_0_1 = ()->probability("same")
P_0_2 = ()->probability("splitting")

a, b = -1.0, 1.0

q_1 = DP(α_1, P_0_2, a, b)
q_2 = DP(α_2, P_0_2, a, b)


In [None]:
n_tops, n_bottoms = collect(10:100:100), collect(10:100:100) # number of top and bottom level samples
S = 5 # number of times we compute distance for each (n_top, n_bottom) pair. We will average the times over S runs.

times_hipm = zeros(length(n_tops), length(n_bottoms)) # matrix to store average times for HIPM
times_wow = zeros(length(n_tops), length(n_bottoms)) # matrix to store average times for WoW

for (i, n_top) in enumerate(n_tops)
    for (j, n_bottom) in enumerate(n_bottoms)
        println("n_top = $(n_top), n_bottom = $(n_bottom)")
        for s in 1:S
            hier_sample_1 = generate_emp(q_1, n_top, n_bottom)
            hier_sample_2 = generate_emp(q_2, n_top, n_bottom)

            t_hipm = @elapsed dlip(hier_sample_1, hier_sample_2) # time HIPM
            t_wow = @elapsed ww(hier_sample_1, hier_sample_2) # time WoW

            times_hipm[i,j] += t_hipm
            times_wow[i,j] += t_wow
        end
        times_hipm[i,j] /= S
        times_wow[i,j] /= S
    end
end

In [None]:
times_hipm

In [None]:
times_wow

In [None]:
# Round times to 3 decimal places for better readability
times_wow = round.(times_wow, digits = 3)
times_hipm = round.(times_hipm, digits = 3)


# Build the DataFrame
df_hipm = DataFrame(times_hipm, Symbol.(string.("n_bottom_", n_bottoms)))
df_hipm.n_tops = n_tops                  # add n_tops as a column
select!(df_hipm, :n_tops, :)             # move n_tops to the first column

df_wow = DataFrame(times_wow, Symbol.(string.("n_bottom_", n_bottoms)))
df_wow.n_tops = n_tops                  # add n_tops as a column
select!(df_wow, :n_tops, :)             # move n_tops to the first column


In [None]:
df_hipm

In [None]:
df_wow

In [None]:


filepath = joinpath(pwd(), "time_wow_hipm/")
CSV.write(filepath*"times_hipm.csv", df_hipm)
CSV.write(filepath*"times_wow.csv", df_wow)    