In [2]:
import numpy as np

def EnergyDistance(X, Y):
    """ Computes the energy distance (a statistical distance) between the
    cumulative distribution functions F and G of the independent random vectors
    X and Y.

    Inputs:
        X, Y: each is a s x d np-array, where d is the dimension of X and Y
        (assumed to be the same) and s is the number of samples

    Output:
        the energy distance, D, where:
        D^2(F,G) = 2 E||X - Y|| - E||X - X'|| - E||Y - Y'||

    (see https://en.wikipedia.org/wiki/Energy_distance)
    """

    n = X.shape[0]
    m = Y.shape[0]

    # Compute A = E||X - Y||
    A = 0.
    for row in X:
        diff = Y - row
        norms = np.sum(diff**2, axis=-1)**(1./2.)
        A += np.sum(norms)
    A /= (n * m)

    # Compute B = E||X - X'||
    B = 0.
    for row in X:
        diff = X - row
        norms = np.sum(diff**2, axis=-1)**(1./2.)
        B += np.sum(norms)
    B /= (n * n)

    # Compute C = E||Y - Y'||
    C = 0.
    for row in Y:
        diff = Y - row
        norms = np.sum(diff**2, axis=-1)**(1./2.)
        C += np.sum(norms)
    C /= (m * m)

    # Compute energy distance
    D2 = 2. * A - B - C
    D = D2 ** (1./2.)

    return D


In [3]:
def Scan_ED(input1, input2):
    # Get the final values: 
    for arrays in input1, input2:
        print(arrays)
        x = np.stack(input1)
        y = np.stack(input2)
        
        # get the final values from A
        A_final_val = []
        A_final_val.append(x[:,-1])
        
        # get the final values from B
        B_final_val = []
        B_final_val.append(y[:,-1])
        
    # Scan and compute the energy distances; take the global min
    fv1 = np.asarray(A_final_val)
    #fv1 = fv.ravel()
    cond1 = np.asarray(input2)
    #cond = cond.ravel()
    for (i, row) in enumerate(cond1.T):
        a = [EnergyDistance(fv1, row) for row in cond1.T]
    
    fv2 = np.asarray(B_final_val)
    #fv2 = fv.ravel()
    cond2 = np.asarray(input1)
    #cond = cond.ravel()
    for (i, row) in enumerate(cond2.T):
        b = [EnergyDistance(fv2, row) for row in cond2.T]
    #return a, b, np.amin(a + b)

    print("\nMinimum:", i, np.min((np.min(a), np.min(b))))
    
    


In [4]:
N = 100
import random
#[[random.random() for i in range(N)] for j in range(N)]

N1 = 100
x = [[random.random() for i in range(N)] for j in range(N1)]

N2 = 100
y = [[random.random() for i in range(N)] for j in range(N2)]

Data = [[x], [y]]

C1 = Data[0]
C2 = Data[1]


In [5]:
Scan_ED(C1, C2)

[[[0.79617056119638, 0.6465086619687639, 0.11941745756120092, 0.02237964072784837, 0.012297303278786753, 0.869052976258023, 0.06232828759592013, 0.34753934563495104, 0.7765558521791193, 0.7687234693246315, 0.04722994916998913, 0.7278111204041848, 0.8601137596632438, 0.5400637673516957, 0.617123265132955, 0.04452688384011749, 0.3595084340078083, 0.7280767539425388, 0.7710201985095293, 0.28743905613921017, 0.9967501232219618, 0.9635966632917317, 0.7264167235462904, 0.2875431781772919, 0.8470925998092402, 0.9239334071152396, 0.24909859996413442, 0.08090590729274116, 0.7728897751921391, 0.3965604729806883, 0.5722344524208597, 0.8551214354515587, 0.7543483692572176, 0.35288261937964494, 0.9640984716717247, 0.04830517192640016, 0.39792614805825455, 0.3629598225662316, 0.2553241563301347, 0.8190291924692631, 0.8028936472918683, 0.054741694106930194, 0.7545408148099013, 0.829251510933294, 0.9062648210031632, 0.5334609877028873, 0.4806565127775657, 0.40224777452681204, 0.8527378742798197, 0.611


Minimum: 99 2.6177682217907283
