In [1]:
import numpy as np

In [2]:
"""
1d input x_i and x_j, alias to x and x' in the paper
for 2d input we should use np.meshgrid or np.mgrid
"""

num_input = 50

# x_i = np.linspace(0.0, 1.0, num=num_input)
# x_j = np.linspace(0.1, 0.5, num=num_input)

x_i = np.random.uniform(size=num_input)
x_j = np.random.uniform(size=num_input)

# display(x_i)
# display(x_j)


In [3]:
# take outer product of x_i and x_j to get 100 x 100 matrix
# in the paper sigma was defined entry-wise, but we want to compute the matrix at once
n_0 = 1
beta = 1

def calc_sigma_1(x_i, x_j):
    return np.outer(x_i, x_j) / n_0 + beta**2

display(calc_sigma_1(x_i, x_j))

array([[1.40085623, 1.04720925, 1.4505732 , ..., 1.26952259, 1.37277366,
        1.69758009],
       [1.06741091, 1.00793905, 1.07577167, ..., 1.04532488, 1.06268834,
        1.11731015],
       [1.38338023, 1.04515108, 1.43092971, ..., 1.2577723 , 1.35652197,
        1.66716792],
       ...,
       [1.35785584, 1.04214505, 1.40223961, ..., 1.24061054, 1.33278573,
        1.62274973],
       [1.04503299, 1.00530358, 1.05061829, ..., 1.0302787 , 1.04187813,
        1.07836753],
       [1.37340972, 1.04397684, 1.41972258, ..., 1.25106845, 1.34724995,
        1.64981697]])

In [42]:
"""
f(x_i) and f(x_j) follow N(0, cov) where cov is a contatenation of 4 smaller matrices
"""

cov_ii = calc_sigma_1(x_i, x_i)
cov_ij = calc_sigma_1(x_i, x_j)
cov_ji = calc_sigma_1(x_j, x_i)
cov_jj = calc_sigma_1(x_j, x_j)

# cov_ij == cov_ji.T

In [46]:
a = np.outer(x_i, x_j)

a == a.T

array([[ True, False, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       [False, False,  True, ..., False, False, False],
       ...,
       [False, False, False, ...,  True, False, False],
       [False, False, False, ..., False,  True, False],
       [False, False, False, ..., False, False,  True]])

In [5]:
# is_pos_def(cov_ij)

In [6]:
# cov_ij

In [7]:
# cov_ji

In [8]:
# cov_ij == cov_ji.T

In [9]:
# concatenate the 4 sigmas

v1 = np.vstack((cov_ii, cov_ji))
v2 = np.vstack((cov_ij, cov_jj))

cov = np.hstack((v1, v2))

display(cov.shape)

(100, 100)

In [38]:
cov == cov.T

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [11]:
def is_pos_def(x):
    return np.all(np.linalg.eigvals(x) > 0)

is_pos_def(cov)

False

In [12]:
np.linalg.eigvals(cov)

array([ 1.25135729e+02+0.00000000e+00j,  7.01407530e+00+0.00000000e+00j,
       -3.32162507e-15+0.00000000e+00j,  1.58465248e-15+1.42271122e-15j,
        1.58465248e-15-1.42271122e-15j,  1.33824908e-15+0.00000000e+00j,
        1.24225548e-15+0.00000000e+00j, -1.24663725e-15+0.00000000e+00j,
       -1.21007595e-15+0.00000000e+00j,  1.17789355e-15+0.00000000e+00j,
        1.14842867e-15+0.00000000e+00j, -1.13592417e-15+0.00000000e+00j,
        1.06904482e-15+0.00000000e+00j,  1.04240352e-15+0.00000000e+00j,
        1.03287867e-15+0.00000000e+00j, -1.01255712e-15+1.75344704e-16j,
       -1.01255712e-15-1.75344704e-16j, -1.06333400e-15+2.18923323e-17j,
       -1.06333400e-15-2.18923323e-17j,  9.79125240e-16+0.00000000e+00j,
       -1.02839397e-15+0.00000000e+00j,  9.25573675e-16+1.13022766e-17j,
        9.25573675e-16-1.13022766e-17j, -9.76364544e-16+0.00000000e+00j,
       -9.59978502e-16+0.00000000e+00j, -9.17892084e-16+0.00000000e+00j,
        8.12326662e-16+1.34874232e-16j,  8.12326662

In [13]:
"""
now sample Y_i, Y_j from this distribution N(0, cov)
"""
mean = [0] * 2 * num_input

# sample once
# np.random.seed(26)
sample = np.random.multivariate_normal(mean, cov, size=1, check_valid='warn')
# f_xi = sample[:num_input]
# f_xj = sample[num_input:]
# display(f_xi)
# display(f_xj)
sample

array([[-1.77799261, -0.45758656, -1.70878971, -1.16565954, -1.30369325,
        -0.85885433, -0.78902215, -0.29230709, -1.73952806, -0.37796309,
        -0.22726201, -0.79005544, -1.77534182, -1.34848445, -1.75796255,
        -0.98930746, -1.44032258, -1.00928676, -1.9108657 , -0.9785132 ,
        -0.55087345, -1.47250961, -1.5967266 , -0.42519045, -1.10795561,
        -0.75800948, -0.22690504, -1.50753165, -0.7908904 , -0.80660598,
        -0.77390254, -0.67044183, -0.6600855 , -0.71347881, -0.27787209,
        -0.91443994, -0.85345118, -0.51936057, -1.89896637, -0.25455689,
        -1.49275172, -1.57601327, -0.23977018, -0.97632957, -1.14994963,
        -0.32361097, -0.40111946, -1.60771596, -0.36897216, -1.66930757,
        -1.03076011, -0.28958769, -1.13495703, -1.11866893, -1.54573889,
        -0.25774912, -0.53543113, -1.99604878, -0.69887071, -1.55440807,
        -1.026216  , -0.47032781, -1.18331139, -1.37004399, -1.7265353 ,
        -0.9582585 , -1.68355294, -1.72793893, -0.8

In [23]:
# gaol: approximate expectation using a finite sum and take the mean
# approach 1: sample and compute outer product first, then take mean
def calc_sigma_2_v1(activation):
    num_samples = 1000
    sum_N = np.zeros((num_input, num_input))
    np.random.seed(26)
    for i in range(num_samples):
        sample = np.random.multivariate_normal(mean, cov, size=1)[0]
        f_xi = sample[:num_input]
        f_xj = sample[num_input:]
        sum_N += np.outer(activation(f_xi), activation(f_xj))

    sigma_2 = (sum_N / num_samples) + beta**2
    
    return sigma_2

#     display(sigma_2)

In [25]:
# # approach 2: vectorize the sampling and take mean of activated f, then do outer product
# def calc_sigma_2_v2(activation):
#     num_samples = 1000
#     np.random.seed(26)
#     s = np.random.multivariate_normal(mean, cov, size=num_samples)
#     # s: (num_samples, 10)

#     f_xi = s[:, :num_input]
#     f_xj = s[:, num_input:]

#     # we may take the mean of the relu and then do outer product
#     sig_f_xi = np.mean(activation(f_xi), axis=0)
#     sig_f_xj = np.mean(activation(f_xj), axis=0)

#     sigma_2 = np.outer(sig_f_xi, sig_f_xj) + beta**2
    
#     return sigma_2

# #     display(sigma_2)

In [26]:
"""
activation functions and their derivatives
"""

def relu_kth(x, k):
    return np.maximum(x**k, 0)

def relu(x):
    return np.maximum(x, 0)

def d_relu(x):
    return 1.0 * (x > 0)

def d_relu_kth(x, k):
    return k * x**(k-1) * (x > 0)

# display(relu(f_xi))
# display(d_relu(f_xi))


In [28]:
calc_sigma_2_v1(relu).shape

(50, 50)

In [None]:
# calc_sigma_2_v2(relu)

In [29]:
"""
Compute Theta 1 and 2 using Sigma
"""

THETA_1 = calc_sigma_1(x_i, x_j) # actually make a copy

sigma_2 = calc_sigma_2_v1(relu)
sigma_2_prime = calc_sigma_2_v1(d_relu)

THETA_2 = np.multiply(THETA_1, sigma_2_prime) + sigma_2

In [30]:
display(THETA_1.shape)
display(THETA_2.shape)

(50, 50)

(50, 50)

In [31]:
np.linalg.eigvals(THETA_2)

array([ 1.65156929e+02+0.00000000e+00j,  2.74651485e-01+3.84616308e-01j,
        2.74651485e-01-3.84616308e-01j,  7.30260702e-02+0.00000000e+00j,
        1.65396762e-02+1.18135828e-02j,  1.65396762e-02-1.18135828e-02j,
       -7.77309527e-03+1.20775122e-02j, -7.77309527e-03-1.20775122e-02j,
        1.40435643e-02+0.00000000e+00j, -1.16361593e-02+0.00000000e+00j,
        1.02077089e-02+0.00000000e+00j, -4.24716599e-04+4.67794469e-03j,
       -4.24716599e-04-4.67794469e-03j,  3.50258610e-03+1.94275917e-03j,
        3.50258610e-03-1.94275917e-03j, -7.69532604e-04+3.21898567e-03j,
       -7.69532604e-04-3.21898567e-03j,  1.87578375e-03+1.17497806e-03j,
        1.87578375e-03-1.17497806e-03j, -1.81747341e-03+0.00000000e+00j,
       -3.27506278e-04+1.58929462e-03j, -3.27506278e-04-1.58929462e-03j,
       -1.18896250e-03+0.00000000e+00j,  8.91068061e-04+0.00000000e+00j,
        8.51939046e-05+5.06116329e-04j,  8.51939046e-05-5.06116329e-04j,
        1.20788983e-04+0.00000000e+00j, -7.62752948

In [33]:
THETA_2 == THETA_2.T

array([[ True, False, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       [False, False,  True, ..., False, False, False],
       ...,
       [False, False, False, ...,  True, False, False],
       [False, False, False, ..., False,  True, False],
       [False, False, False, ..., False, False,  True]])

In [36]:
sigma_2 == sigma_2.T

array([[ True, False, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       [False, False,  True, ..., False, False, False],
       ...,
       [False, False, False, ...,  True, False, False],
       [False, False, False, ..., False,  True, False],
       [False, False, False, ..., False, False,  True]])

In [37]:
calc_sigma_1(x_i, x_j)

array([[1.40085623, 1.04720925, 1.4505732 , ..., 1.26952259, 1.37277366,
        1.69758009],
       [1.06741091, 1.00793905, 1.07577167, ..., 1.04532488, 1.06268834,
        1.11731015],
       [1.38338023, 1.04515108, 1.43092971, ..., 1.2577723 , 1.35652197,
        1.66716792],
       ...,
       [1.35785584, 1.04214505, 1.40223961, ..., 1.24061054, 1.33278573,
        1.62274973],
       [1.04503299, 1.00530358, 1.05061829, ..., 1.0302787 , 1.04187813,
        1.07836753],
       [1.37340972, 1.04397684, 1.41972258, ..., 1.25106845, 1.34724995,
        1.64981697]])