In [1]:
import numpy as np

In [65]:
num_input = 5

x_i = np.linspace(0.1, 0.5, num=num_input)
x_j = np.linspace(0.6, 1.0, num=num_input)

display(x_i)
display(x_j)


array([0.1, 0.2, 0.3, 0.4, 0.5])

array([0.6, 0.7, 0.8, 0.9, 1. ])

In [66]:
# outer product of x_i and x_j to get 100 x 100 matrix
# in the paper sigma was defined entry-wise, but we want to compute the matrix at once
n_0 = 1
beta = 0.1

def calc_sigma_1(x_i, x_j):
    return np.outer(x_i, x_j) / n_0 + beta**2

display(calc_sigma_1(x_i, x_j))

array([[0.07, 0.08, 0.09, 0.1 , 0.11],
       [0.13, 0.15, 0.17, 0.19, 0.21],
       [0.19, 0.22, 0.25, 0.28, 0.31],
       [0.25, 0.29, 0.33, 0.37, 0.41],
       [0.31, 0.36, 0.41, 0.46, 0.51]])

In [67]:
# f(x_i) and f(x_j) follow N(0, cov) where cov is a contatenation of 4 smaller matrices

cov_ii = calc_sigma_1(x_i, x_i)
cov_ij = calc_sigma_1(x_i, x_j)
cov_ji = calc_sigma_1(x_j, x_i)
cov_jj = calc_sigma_1(x_j, x_j)

# they are all equal because x_i == x_j for now

In [68]:
# concatenate

v1 = np.vstack((cov_ii, cov_ji))
v2 = np.vstack((cov_ij, cov_jj))

cov = np.hstack((v1, v2))

display(cov.shape)

(10, 10)

In [69]:
# now sample Y_i, Y_j from this distribution N(0, cov)
mean = [0] * 2 * num_input

# sample once
sample = np.random.multivariate_normal(mean, cov, size=1, check_valid='warn')[0]
f_xi = sample[:num_input]
f_xj = sample[num_input:]
display(f_xi)
display(f_xj)

array([-0.000281  ,  0.03377949,  0.06783998,  0.10190046,  0.13596095])

array([0.17002143, 0.20408192, 0.23814241, 0.2722029 , 0.30626339])

In [70]:
def relu_kth(x, k):
    return np.maximum(x**k, 0)

def relu(x):
    return np.maximum(x, 0)

def d_relu(x):
    return 1.0 * (x > 0)

def d_relu_kth(x, k):
    return k * x**(k-1) * (x > 0)

display(relu(f_xi))
display(d_relu(f_xi))


array([0.        , 0.03377949, 0.06783998, 0.10190046, 0.13596095])

array([0., 1., 1., 1., 1.])

In [71]:
# gaol: approximate expectation using a finite sum and take the mean

In [83]:
# sigma_2 = np.outer(relu(f_xi), relu(f_xj)) + beta**2

# approach 1: sample and compute outer product first, then take mean
def calc_sigma_2(activation):
    num_samples = 100
    sum_N = np.zeros((num_input, num_input))

    for i in range(num_samples):
        sample = np.random.multivariate_normal(mean, cov, size=1)[0]
        f_xi = sample[:num_input]
        f_xj = sample[num_input:]
        sum_N += np.outer(activation(f_xi), activation(f_xj))

    sigma_2 = (sum_N / num_samples) + beta**2
    
    return sigma_2

#     display(sigma_2)

In [84]:
# approach 2: vectorize the sampling and take mean of activated f, then do outer product
def calc_sigma_2(activation):
    s = np.random.multivariate_normal(mean, cov, size=num_samples)
    # s: (num_samples, 10)

    f_xi = s[:, :num_input]
    f_xj = s[:, num_input:]

    # we may take the mean of the relu and then do outer product
    sig_f_xi = np.mean(activation(f_xi), axis=0)
    sig_f_xj = np.mean(activation(f_xj), axis=0)

    sigma_2 = np.outer(sig_f_xi, sig_f_xj) + beta**2
    
    return sigma_2

#     display(sigma_2)

In [85]:
THETA_1 = calc_sigma_1(x_i, x_j) # actually make a copy

sigma_2 = calc_sigma_2(relu)
sigma_2_prime = calc_sigma_2(d_relu)

THETA_2 = np.multiply(THETA_1, sigma_2_prime) + sigma_2

In [89]:
display(THETA_1)
display(THETA_2)

array([[0.07, 0.08, 0.09, 0.1 , 0.11],
       [0.13, 0.15, 0.17, 0.19, 0.21],
       [0.19, 0.22, 0.25, 0.28, 0.31],
       [0.25, 0.29, 0.33, 0.37, 0.41],
       [0.31, 0.36, 0.41, 0.46, 0.51]])

array([[0.04525108, 0.05107664, 0.0560382 , 0.06143176, 0.06683854],
       [0.07499284, 0.08617807, 0.09574331, 0.10611854, 0.11651788],
       [0.10658508, 0.123453  , 0.13790092, 0.15355884, 0.16925252],
       [0.13647174, 0.15871325, 0.17782276, 0.19849827, 0.21922156],
       [0.16742307, 0.1952285 , 0.21914592, 0.24500735, 0.27092859]])