## Multivariate Empirical CDF Algorithm

This algorithm uses Divide and Conquer in order to calculate the multivariate empirical 
cdf, with $O(n\log n)$.

Algorithm based on 
Lee, D., & Joe, H. (2018). Efficient computation of multivariate empirical distribution functions at the observed values. Computational Statistics, 33(3), 1413-1428.

In [268]:
using LinearAlgebra
using Random
using CairoMakie
using Distances
CairoMakie.activate!(type = "svg")
using Debugger
using StatsBase
using BenchmarkTools

In [486]:
x = collect(1:8)
y = [8,4,3,6,5,1,2,7]*10
p = ones(8)
# p = [1,1,1,1,0,0,1,1]

N = 100
x = rand(N)
y = rand(N)
v = sortperm(x)
x = x[v]
y = y[v]
p = normalize!(rand(N),1)
bruteecdf(x,y,p) ≈ bivariatedominance(x,y,p)

true

In [487]:
function bruteecdf(x,y,p=fill(1/length(y),length(y)))
    n = length(y)
    z = zeros(n)
    for i in 1:n
        xbool = x[i] .≥ x[1:end]
        ybool = y[i] .≥ y[1:end]
        z[i] = (xbool .* ybool) ⋅ p
    end
    return z
end

bruteecdf (generic function with 2 methods)

In [488]:
function bivariatedominance(x,y,p=fill(1/length(y),length(y)))
    function Merge!(y1,y2)
        y1,y2
        N1 = length(y1)
        N2 = length(y2)
        N  = N1 + N2
        i,j,k,b = 1,1,1,0
        y = zeros(Int, N)
        while i ≤ N1 && j ≤ N2
            if y1[i] ≤ y2[j]
                y[k]  = y1[i]
                b = b+p[y[k]]
                i = i+1
            else
                y[k] = y2[j]
                z[y2[j]] = z[y2[j]] + b
                j = j+1
            end
            k = k+1
        end
        if i ≤ N1
            y[k:end] = y1[i:end]
        end

        if j ≤ N2
            y[k:end] = y2[j:end]
            z[y[k:end]] = z[y[k:end]] .+ b
        end
        return y
    end

    function Sort!(y)
        N = length(y)
        if N == 1
            return y
        else
            m = floor(Int,N/2)
            y1= Sort!(y[1:m])
            y2= Sort!(y[m+1:N])
            y = Merge!(y1,y2)
            return y
        end
    end
    
#     @assert issorted(x)
    v = sortperm(x)
    yᵣ = sortperm(y[v])
#     yᵣ = sortperm(y)
    z = zeros(length(y))
    Sort!(yᵣ)
    
    return z.+p
end

bivariatedominance (generic function with 2 methods)

In [263]:
z = zeros(length(y))

function Merge(y1,y2)
    @show y1,y2
    N1 = length(y1)
    N2 = length(y2)
    N  = N1 + N2
    i,j,k,b = 1,1,1,0
    y = zeros(Int, N)
    while i ≤ N1 && j ≤ N2
        if y1[i] ≤ y2[j]
            y[k]  = y1[i]
            b = b+1
            i = i+1
        else
            y[k] = y2[j]
            z[y2[j]] = z[y2[j]] + b
            j = j+1
        end
        k = k+1
    end
    @show k
    @show y, i, N1
    if i ≤ N1 && k-N1 ≤ N1
        @show y[k:end] = y1[k-N1:end]
    end
    @show y, j
    if j ≤ N2 && k-N2 ≤ N2
        y[k:end] = y2[k-N2:end]
        @show z[y[k:end]] = z[y[k:end]] .+ b
    end
    @show z
    @show y
    return y
end

function Sort(y)
    N = length(y)
    if N == 1
        return y
    else
        m = floor(Int,N/2)
        y1= Sort(y[1:m])
        y2= Sort(y[m+1:N])
        y = Merge(y1,y2)
        return y
    end
end

Sort (generic function with 1 method)

In [166]:
# floor(Int,length(y)/2)
Sort(y)

(y1, y2) = ([8], [4])
k = 2
(y, i, N1) = ([4, 0], 1, 1)
y[k:end] = y1[k - N1:end] = [8]
(y, j) = ([4, 8], 2)
z = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
y = [4, 8]
(y1, y2) = ([3], [6])
k = 2
(y, i, N1) = ([3, 0], 2, 1)
(y, j) = ([3, 0], 1)
z[y[k:end]] = z[y[k:end]] .+ b = [1.0]
z = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]
y = [3, 6]
(y1, y2) = ([4, 8], [3, 6])
k = 4
(y, i, N1) = ([3, 4, 6, 0], 2, 2)
y[k:end] = y1[k - N1:end] = [8]
(y, j) = ([3, 4, 6, 8], 3)
z = [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0]
y = [3, 4, 6, 8]
(y1, y2) = ([5], [1])
k = 2
(y, i, N1) = ([1, 0], 1, 1)
y[k:end] = y1[k - N1:end] = [5]
(y, j) = ([1, 5], 2)
z = [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0]
y = [1, 5]
(y1, y2) = ([2], [7])
k = 2
(y, i, N1) = ([2, 0], 2, 1)
(y, j) = ([2, 0], 1)
z[y[k:end]] = z[y[k:end]] .+ b = [1.0]
z = [0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0]
y = [2, 7]
(y1, y2) = ([1, 5], [2, 7])
k = 4
(y, i, N1) = ([1, 2, 5, 0], 3, 2)
(y, j) = ([1, 2, 5, 0], 2)
z[y[k:end]] = z[y[k:end]] .+ b = [3.0]
z =

8-element Vector{Int64}:
 1
 2
 3
 4
 5
 6
 7
 8