In [1]:
import time

import numpy as np
from numba import jit
from sklearn.metrics.pairwise import pairwise_distances, pairwise_kernels

In [18]:
def compute_kernel(X, sigma):
    """
    X : (n_samples, n_dim)
    sigma : float
    """

    d = X.shape[1]
    denom = np.power(2 * np.pi, d / 2.0) * np.power(sigma, d / 2)
    constant = 1 / denom

    kern = pairwise_kernels(X, metric="rbf", gamma=1 / (sigma * 2)) * constant

    return kern


@jit(nopython=True)
def compute_distance(distx, disty, i, j, k, l):
    withinx = distx[i, j] + distx[k, l] - distx[i, k] - distx[j, l]
    withiny = disty[i, j] + disty[k, l] - disty[i, k] - disty[j, l]

    return withinx * withiny


@jit(nopython=True)
def compute_cov(distx, disty, distz):
    n = X.shape[0]
    cov = np.zeros(n)

    for u in range(n):
        for i in range(n):
            for j in range(n):
                for k in range(n):
                    for l in range(n):
                        dijkl = compute_distance(distx, disty, i, j, k, l)
                        dijlk = compute_distance(distx, disty, i, j, l, k)
                        dilkj = compute_distance(distx, disty, i, l, k, j)
                        d = dijkl + dijlk + dilkj

                        cov[u] += (
                            d * distz[i, u] * distz[j, u] * distz[k, u] * distz[l, u]
                        )

        cov[u] /= np.power(n, 4)

    return cov


# @jit(nopython=True)
def compute_cor(dx, dy, dz):
    cov_xy = compute_cov(
        dx,
        dy,
        dz,
    )
    cov_xx = compute_cov(
        dx,
        dx,
        dz,
    )
    cov_yy = compute_cov(
        dy,
        dy,
        dz,
    )

    return cov_xy / np.sqrt(cov_xx * cov_yy)

In [17]:
X = np.loadtxt("./src/x.csv", skiprows=1, delimiter=",")
Y = np.loadtxt("./src/y.csv", skiprows=1, delimiter=",")
Z = np.loadtxt("./src/z.csv", skiprows=1, delimiter=",")

dx = pairwise_distances(X)
dy = pairwise_distances(Y)
dz = compute_kernel(Z, 0.5)

In [22]:
start = time.time()

cdcov = compute_cov(
    dx,
    dy,
    dz,
)

end = time.time()

print(end - start)

0.43752312660217285


In [27]:
start = time.time()

cdcor = compute_cor(
    dx,
    dy,
    dz,
)

end = time.time()

print(end - start)

0.09127974510192871


In [4]:
def example1(n):
    mean = np.array([0, 0, 0])
    cov = np.array([[1, 0.36, 0.6], [0.36, 1, 0.6], [0.6, 0.6, 1]])

    X = np.random.multivariate_normal(mean, cov, size=n)
    Y = np.random.multivariate_normal(mean, cov, size=n)
    Z = np.random.multivariate_normal(mean, cov, size=n)

    return X, Y, Z

In [14]:
X, Y, Z = example1(100)

dx = pairwise_distances(X)
dy = pairwise_distances(Y)
dz = compute_kernel(Z, 0.5)

In [19]:
start = time.time()

cdcor = compute_cov(
    dx,
    dy,
    dz,
)

end = time.time()

print(end - start)

93.0084080696106


In [16]:
np.savetxt("./data/x.csv", X, delimiter=",", comments="", header="a,b,c")
np.savetxt("./data/y.csv", Y, delimiter=",", comments="", header="a,b,c")
np.savetxt("./data/z.csv", Z, delimiter=",", comments="", header="a,b,c")

In [17]:
cdcor.shape

(50,)