In [138]:
import numpy as np
from numba import jit
from scipy.spatial.distance import pdist, squareform
import time
import warnings
warnings.filterwarnings('ignore')

In [139]:
X= np.array([[1,2], [4,1], [3,2], [3, 3]])
Y= np.array([[4,1], [3,2], [1,4], [2, 4]])

In [140]:
a = np.array([1, 2, 3, 4, 5])
b = np.array([3, 2, 4, 2, 1])

In [141]:
@jit
def distcorr(X, Y):
    """Calculates the distance correlation for two numpy arrays.
    The arrays can be 1 or multidimensional.
    
    Modeled on code of satra at https://github.com/satra/mapalign/blob/master/mapalign/dist.py#L8
    
    Example 1:
    >>> X= np.array([[1,2], [4,1], [3,2], [3, 3]])
    >>> Y= np.array([[4,1], [3,2], [1,4], [2, 4]])
    >>> distcorr(X, Y)
    0.6908276670972249
    
    Example 2:
    >>> a = np.array([1, 2, 3, 4, 5])
    >>> b = np.array([3, 2, 4, 2, 1])
    >>> distcorr(a, b)
    0.5183052680392057
    """
    
    # check to see if all elements in X or Y are the same
    XL = X.tolist()
    YL = Y.tolist()
    if XL.count(XL[0]) == len(XL) or YL.count(YL[0]) == len(YL):
        raise Exception("All elements of one input are equal, cannot divide by zero") 
    
    # check to see if X and Y are the same length and dimension
    if Y.shape != X.shape:
        raise ValueError('Samples mismatch in lenght or dimension')
        
    # if necessary convert X and Y from 1D to 2D arrays
    if len(X.shape) == 1:
        X = np.atleast_2d(X).T
        Y = np.atleast_2d(Y).T
    
    # Create pairwise distance matrices
    a = squareform(pdist(X))
    b = squareform(pdist(Y))
    
    # doubly center distances
    A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
    B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
    
    # determine n^2
    n_squared = float(X.shape[0]**2)
    
    # calculate distance variances and covariances for XX, YY, XY
    dcov2_xy = (A * B).sum()/n_squared
    dcov2_xx = (A * A).sum()/n_squared
    dcov2_yy = (B * B).sum()/n_squared
    
    # calculate distance covariance for XY
    return dcov2_xy/np.sqrt(dcov2_xx * dcov2_yy)

In [142]:
# Example 
X= np.array([[1,2], [4,1], [3,2], [3, 3]])
Y= np.array([[4,1], [3,2], [1,4], [2, 4]])

In [143]:
distcorr(X, Y)

0.6908276670972249