# High-dimensional HSIC

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gamma
import pickle

Different combinations of $(X, Y)$ to evaluate the approximation methods.

## Examples of one to three-dimensional distributions

In [None]:
#X = np.random.normal(0, 1, 256).reshape(-1,1)
#X = np.random.exponential(size=100).reshape(-1,1)
#X = np.random.chisquare(1, size=20).reshape(-1,1)
X = np.random.multivariate_normal([0,1,2], [[1,1,1], [1,1,1], [1,1,1]], size=1024)
#X = np.random.randn(20*128).reshape(20,-1)


#Y = np.random.normal(0, 1, 64).reshape(-1,1)
#Y = np.random.exponential(size=256).reshape(-1,1)
#Y = np.random.randn(20*128).reshape(20,-1)
Y = np.random.multivariate_normal([10,11,12], [[11,11,11], [11,11,11], [11,11,11]], size=1024)

In [None]:
print(X.shape)
print(Y.shape)

## Examples of higher-dimensional distributions

In [None]:
# dimensions
dim = 25

# how many samples
sample_size = 256

# random seed
i = 1

# set how far to shift Y (+ shifts to the left, - shifts to the right)
shift_par = 0   

print('Shifting for:', shift_par)

In [None]:
time1 = np.random.uniform(-2, 2, dim)
print(time1)
plt.figure(figsize=(16,10))
plt.plot(time1, 'xr', markersize=16);

In [None]:
# mean independence
time2 = np.random.uniform(-2, 2, dim)

print(time2)
plt.figure(figsize=(16,10))
plt.plot(time2, 'xr', markersize=16);

# Generating high-dimensional distributions

### Generating process for mean dependence

In [None]:
# generating distributions over time for dependent and independent variables
def gen_dependence(time1, time2, test='independence', s=i, sample_size=sample_size):
    
    t1d = np.empty((0, sample_size))
    t2d = np.empty((0, sample_size))
    
    # Y = 7*X + N(0,I)
    if test=='dependence_linear':
        for t in time1:
            dist1 = np.random.normal(0, 1, sample_size)            
            t1d = np.append(t1d, [dist1], axis=0)

            dist2 = 7*dist1 + np.random.normal(0, 1, sample_size)
            t2d = np.append(t2d, [dist2], axis=0)

    
    # Y = sin(X) + N(0,I)
    elif test=='dependence_sine':
        for t in time1:
            dist1 = np.random.normal(0, 1, sample_size)
            t1d = np.append(t1d, [dist1], axis=0)

            dist2 = 20*np.sin(4*np.pi*dist1) + np.random.normal(0, 1, sample_size)
            t2d = np.append(t2d, [dist2], axis=0)
    
    # Y = 5*X^3 + N(0,I)
    elif test=='dependence_cubic':
        for t in time1:
            dist1 = np.random.normal(0, 1, sample_size)
            t1d = np.append(t1d, [dist1], axis=0)
            
            dist2 = 5*dist1**3 + np.random.normal(0, 1, sample_size)
            t2d = np.append(t2d, [dist2], axis=0)
    
    # Y = 13*X^4 + N(0,I)
    elif test=='dependence_quartic':
        for t in time1:
            dist1 = np.random.normal(0, 1, sample_size)
            t1d = np.append(t1d, [dist1], axis=0)
            
            dist2 = 13*dist1**4 + np.random.normal(0, 1, sample_size)
            t2d = np.append(t2d, [dist2], axis=0)
        
    
    elif test=='independence':  
        for t in time1:
            dist1 = np.random.normal(0, 1, sample_size)
            t1d = np.append(t1d, [dist1], axis=0)
        
        for t in time2:
            dist2 = np.random.normal(0, 1, sample_size)
            t2d = np.append(t2d, [dist2], axis=0)
    
    # shifting time2
    if shift_par >= 0:
        t2d = t2d[shift_par:]
    elif shift_par < 0:
        t2d = t2d[:shift_par]

    time1d = t1d.T
    time2d = t2d.T
    
    return time1d, time2d

In [None]:
time1 = np.linspace(0, 11, 12)
time2 = np.linspace(0, 11, 12)
time1d, time2d = gen_dependence(time1, time2, test='independence', s=1, sample_size=sample_size, dist='beta')

### Plottings

In [None]:
# different dimensions
print(time1d.shape)
plt.figure(figsize=(16,10))
plt.axis([-1, time1d.shape[1], -4, 4])
plt.plot(time1d.T);
plt.show()

In [None]:
print(time2d.shape)
plt.figure(figsize=(16,10))
plt.axis([-1, time2d.shape[1], -4, 4])
plt.plot(time2d.T);
plt.show()

In [None]:
# same dimensions
if shift_par > 0:
    X = time1d[:, :-shift_par]
    Y = time2d
elif shift_par < 0:
    X = time1d[:, -shift_par:]
    Y = time2d
else:
    X = time1d
    Y = time2d

In [None]:
print(X.shape)
plt.figure(figsize=(16,10))
plt.axis([-1, X.shape[1], -4, 4])
plt.plot(X.T);
plt.show()

In [None]:
print(Y.shape)
plt.figure(figsize=(16,10))
plt.axis([-1, Y.shape[1], -4, 4])
plt.plot(Y.T);
plt.show()

Show their dependence:

In [None]:
plt.scatter(X, Y);

### Auxiliary functions

In [None]:
# median heuristic for kernel width
def width(Z):
    # compute median for Z
    size_Z = Z.shape[0]
    if size_Z > 100:
        Z_med = Z[0:100]
        size_Z = 100
    else:
        Z_med = Z

    G_Z = np.sum(np.multiply(Z_med, Z_med), axis=1).reshape(-1,1)
    Q_Z = np.tile(G_Z, (1, size_Z))
    R_Z = np.tile(G_Z.T, (size_Z, 1))
    dists_Z = Q_Z + R_Z - 2 * Z_med @ Z_med.T
    distances_Z = (dists_Z - np.tril(dists_Z)).reshape(-1, 1)
    width_Z = np.sqrt(0.5*np.median(distances_Z[distances_Z>0]))
    d_Z = Z.shape[1]
    
    return d_Z * width_Z


# rbf dot product
def rbf_dot(X, Y, width):
    size_X = X.shape
    size_Y = Y.shape
    
    G = np.sum(np.multiply(X, X), axis=1).reshape(-1,1)
    H = np.sum(np.multiply(Y, Y), axis=1).reshape(-1,1)
    
    Q = np.tile(G, (1, size_Y[0]))
    R = np.tile(H.T, (size_X[0], 1))
    
    H = Q + R - 2 * X @ Y.T
    
    # rbf kernel
    K = np.exp(-H/(2*width**2))
    
    return K

# HSIC

$\mathcal{H}_0: P_{XY} = P_X P_Y$ is a complex distribution and must be approximated. Here, we approximate it first by randomly permuting the order of $Y$ whilst the order of $X$ is kept fixed, and second by a Gamma distribution with parameters $\alpha$ (`al`) and $\beta$ (`bet`).

## HSIC with permutations

In [None]:
def HSIC_permutations(X, Y, alpha, width_X, width_Y, shuffle):    # set widths to -1 for median heuristics
    
    m = X.shape[0]
    
    # median heuristics for kernel width
    if width_X == -1:
        width_X = width(X)      
    if width_Y == -1:
        width_Y = width(Y)
    
    # compute Gram matrices
    K = rbf_dot(X, X, width_X)
    L = rbf_dot(Y, Y, width_Y)
    
    # centering matrix...
    H = np.eye(m) - (1/m) * (np.ones((m, m)))
    
    # ...to center K
    K_c = H @ K @ H
    
    # test statistic
    stat = 1/(m**2) * np.sum(np.multiply(K_c.T, L))
    
    # initiating HSIC
    HSIC_arr = np.zeros(shuffle)
    
    # create permutations by reshuffling L except the main diagonal
    for sh in range(shuffle):       
        index_perm = np.random.permutation(L.shape[0])
        L_perm = L[np.ix_(index_perm, index_perm)]
        HSIC_arr[sh] = 1/(m**2) * np.sum(np.multiply(K_c.T, L_perm))
        
    HSIC_arr_sort = np.sort(HSIC_arr)
    
    # computing 1-alpha threshold
    threshold = HSIC_arr_sort[round((1-alpha)*shuffle)]
    
    """
    if stat > threshold:
        print('H0 rejected')
    else:
        print('H0 accepted')
    """
    
    return stat, threshold

## HSIC with Gamma distribution approximation

In [None]:
def HSIC_gamma(X, Y, alpha, width_X, width_Y):    # set widths to -1 for median heuristics
    
    m = X.shape[0]
    
    # median heuristics for kernel width
    if width_X == -1:
        width_X = width(X)      
    if width_Y == -1:
        width_Y = width(Y)
    
    # compute Gram matrices
    K = rbf_dot(X, X, width_X)
    L = rbf_dot(Y, Y, width_Y)
    
    # centering matrix...
    H = np.eye(m) - (1/m) * (np.ones((m, m)))
    
    # ...to center K and L
    K_c = H @ K @ H
    L_c = H @ L @ H
    
    # test statistic
    stat = 1/m * np.sum(np.multiply(K_c.T, L))
    
    # fitting Gamma distribution to stat
    vHSIC = np.power(1/6 * np.multiply(K_c, L_c), 2)
    vaHSIC = 1/(m*(m-1)) * (np.sum(vHSIC) - np.trace(vHSIC))
    varHSIC = 72*(m-4)*(m-5)/(m*(m-1)*(m-2)*(m-3)) * vaHSIC    # variance under H0
        
    K_diag = K - np.diag(np.diag(K))
    L_diag = L - np.diag(np.diag(L))
    
    bone = np.ones(m)
    
    mu_X = 1/(m*(m-1)) * bone @ (K @ bone)
    mu_Y = 1/(m*(m-1)) * bone @ (L @ bone)
    
    mHSIC = 1/m * (1 + mu_X * mu_Y - mu_X - mu_Y)    # mean under H0
    
    al = mHSIC**2 / varHSIC
    bet = varHSIC * m / mHSIC
    
    # computing 1-alpha threshold
    threshold = gamma.ppf(1-alpha, al, scale=bet)
    
    """
    if stat > threshold:
        print('H0 rejected')
    else:
        print('H0 accepted')
    """
    
    return stat, threshold

### Evaluations

In [None]:
# test level alpha = 0.05, 5000 permutations
HSIC_permutations(X, Y, 0.05, -1, -1, 5000)

In [None]:
# test level alpha = 0.05
HSIC_gamma(X, Y, 0.05, -1, -1)

## Power estimation

We estimate the statistical power based on 1000 replications for each setting. Our experiment settings compose of various dimensions, sample sizes, mean dependencies, and variance dependencies.

In [None]:
# dimensions
dims = [1, 5, 10, 25]

# sample sizes
sample_sizes = list(np.concatenate((4 * np.linspace(4, 16, 13), 4 * np.linspace(18, 32, 8))))

# shifting process Y against X
shift_pars = [0, 1, 2, 3]

# possible tests
tests = ['independence', 'dependence_linear', 'dependence_sine', 'dependence_cubic', 'dependence_quartic']

### Power estimation for dependence

In [None]:
HSIC_p_m = {}
HSIC_g_m = {}

for dim in dims:
    print('Dimensions:', dim)
    for sample_size in sample_sizes:
        print('Sample size:', int(sample_size))
        for shift_par in shift_pars:
            print('Shift:', shift_par)
            for test in tests:
        
                HSIC_p_m_list = []
                HSIC_g_m_list = []

                # repeating 200 times
                for i in range(200):

                    # defining time scales
                    time1 = np.linspace(0, dim-1, dim)
                    time2 = np.linspace(0, dim-1, dim)

                    # defining dependencies
                    time1d, time2d = gen_dependence(time1, time2, test=test, s=i, sample_size=int(sample_size))    
                    
                    # bringing X and Y in same space
                    if shift_par > 0:
                        X = time1d[:, :-shift_par]
                        Y = time2d
                    elif shift_par < 0:
                        X = time1d[:, -shift_par:]
                        Y = time2d
                    else:
                        X = time1d
                        Y = time2d

                    # test level alpha = 0.05, 5000 permutations
                    HSIC_p_m_list.append(HSIC_permutations(X, Y, 0.05, -1, -1, 5000))

                    # test level alpha = 0.05
                    HSIC_g_m_list.append(HSIC_gamma(X, Y, 0.05, -1, -1))

                HSIC_p_m[(dim, int(sample_size), shift_par, test)] = HSIC_p_m_list
                HSIC_g_m[(dim, int(sample_size), shift_par, test)] = HSIC_g_m_list

In [None]:
# saving
dependence_p = open('dependencies_p_{}.pkl'.format(dims), 'wb')
pickle.dump(HSIC_p_m, dependence_p)
dependence_p.close()

dependence_g = open('dependencies_g_{}.pkl'.format(dims), 'wb')
pickle.dump(HSIC_g_m, dependence_g)
dependence_g.close()