In [1]:
import numpy as np
from mmdew import mmd
from sklearn.metrics.pairwise import rbf_kernel as k

In [2]:
def inv(A, l=1e-8):
    return np.linalg.inv(A + l * np.identity(len(A)))

In [3]:
g = 1 # gamma
n = 64 # full sample size
n_p = 8 # size of subsample 1
n_pp = 3 # size of subsample 2

In [4]:
rng = np.random.default_rng(1234)

In [5]:
X = rng.normal(size=(n,1))
Y = rng.normal(size=(n,1))
# => MMD(X,Y) = 0

In [6]:
mmd_ground_truth = mmd.MMD(biased=True,gamma=g).mmd(X,Y)
print(mmd_ground_truth)

0.027183315073836756


## Subsample 1

In [7]:
idx_p = rng.choice(np.arange(n), size=n_p, replace=True)

In [8]:
one_n = np.ones((n,1))

# for X
kX_mm = k(X[idx_p],gamma=g)
kX_mn = k(X[idx_p],X,gamma=g)
alpha_X = 1/n * inv(kX_mm) @ kX_mn @ np.ones((n,1))

# for Y
kY_mm = k(Y[idx_p],gamma=g)
kY_mn = k(Y[idx_p],Y,gamma=g)
alpha_Y = 1/n * inv(kY_mm) @ kY_mn @ np.ones((n,1))

In [9]:
A = alpha_X.T @ kX_mm @ alpha_X
B = alpha_Y.T @ kY_mm @ alpha_Y
C = alpha_X.T @ k(X[idx_p],Y[idx_p],gamma=g) @ alpha_Y
mmd_nys_1 = (A + B - 2 * C)[0,0]
print("n:\t\t\t", n)
print("Nyström sample size:\t", n_p)
print("Abs. error:\t\t", abs(mmd_nys_1 - mmd_ground_truth))

n:			 64
Nyström sample size:	 8
Abs. error:		 0.004425301593830699


## Subsample 2

In [10]:
idx_pp = rng.choice(idx_p, size=n_pp, replace=True)

In [11]:
# for X
kX_mm = k(X[idx_pp],gamma=g)
kX_mn = k(X[idx_pp],X[idx_p],gamma=g)
alpha_X_pp = inv(kX_mm) @ kX_mn @ alpha_X # hier jetzt mit dem schon berechneten Gewichtsvektor (="zeta")

# for Y
kY_mm = k(Y[idx_pp],gamma=g)
kY_mn = k(Y[idx_pp],Y[idx_p],gamma=g)
alpha_Y_pp = inv(kY_mm) @ kY_mn @ alpha_Y

In [12]:
A = alpha_X_pp.T @ kX_mm @ alpha_X_pp
B = alpha_Y_pp.T @ kY_mm @ alpha_Y_pp
C = alpha_X_pp.T @ k(X[idx_pp],Y[idx_pp],gamma=g) @ alpha_Y_pp
mmd_nys_2 = (A + B - 2 * C)[0,0]
print("n:\t\t\t", n)
print("Nyström sample size:\t\t", n_p)
print("Nyström sample sample size:\t", n_pp)
print("Abs. error ground truth:\t", abs(mmd_nys_2 - mmd_ground_truth))
print("Abs. error to sample 1:\t\t", abs(mmd_nys_2 - mmd_nys_1))

n:			 64
Nyström sample size:		 8
Nyström sample sample size:	 3
Abs. error ground truth:	 0.24699117076919908
Abs. error to sample 1:		 0.24256586917536838
