In [1]:
import numpy as np

In [55]:
def rbm_phi(s, a, b, W):
    c = np.dot(a, s)
    d = 1
    for i in range(len(b)):
        d *= 2 * np.cosh(b[i] + np.dot(W[i], s))
        

    return np.exp(c) * d


def rbm_phi_flat(s, a, b, W):
    W = np.reshape(W, (len(b), len(s)))
    return rbm_phi(s, a, b, W)

In [12]:
def o_der_a(s, a, b, W):
    return s

In [13]:
def o_der_b(s, a, b, W):
    return np.array([np.tanh(b[i] + np.dot(W[i], s)) for i in range(len(b))])

In [33]:
def o_der_W(s, a, b, W):
    return np.array([[s[k] * np.tanh(b[i] + np.dot(W[i], s)) for k in range(len(W[0]))] for i in range(len(W))])


In [59]:
# Let's text the derivative functions by comparing them to finite difference approximations.

def finite_diff(f, x, i, h):
    return (f(x + h * np.eye(len(x))[i]) - f(x - h * np.eye(len(x))[i])) / (2 * h)

def finite_diff_grad(f, x, h):
    return np.array([finite_diff(f, x, i, h) for i in range(len(x))])

def test_derivatives():
    a = np.array([0.1, 0.2])
    b = np.array([0.3, 0.4])
    W = np.array([[0.5, 0.6], [0.7, 0.8]])
    s = np.array([1, 0])


    print("RBM values:")
    print(rbm_phi(s, a, b, W))


    h = 1e-6
    print("Analytical derivatives:")
    print(o_der_a(s, a, b, W)*rbm_phi(s, a, b, W))
    print(o_der_b(s, a, b, W)*rbm_phi(s, a, b, W))
    print(o_der_W(s, a, b, W)*rbm_phi(s, a, b, W).flatten())

    print("Finite difference derivatives:")
    print(finite_diff_grad(lambda x: rbm_phi(s, x, b, W), a, h))
    print(finite_diff_grad(lambda x: rbm_phi(s, a, x, W), b, h))
    print(finite_diff_grad(lambda x: rbm_phi_flat(s, a, b, x), W.flatten(), h))



In [60]:
test_derivatives()

RBM values:
9.864910437871492
Analytical derivatives:
[9.86491044 0.        ]
[6.55066327 7.89685116]
[[6.55066327 0.        ]
 [7.89685116 0.        ]]
Finite difference derivatives:
[9.86491044 0.        ]
[6.55066327 7.89685115]
[6.55066327 0.         7.89685115 0.        ]


In [65]:
def rbm_phi_theta(s, theta):
    n = len(s)
    m = len(theta)//(n+1) - 1
    a = theta[:n]
    b = theta[n:n+m]
    W = np.reshape(theta[n+m:], (n, m))
    return rbm_phi(s, a, b, W)

def o_der_theta(s, theta):
    n = len(s)
    m = len(theta)//(n+1) - 1
    a = theta[:n]
    b = theta[n:n+m]
    W = np.reshape(theta[n+m:], (n, m))
    return np.concatenate([o_der_a(s, a, b, W), o_der_b(s, a, b, W), o_der_W(s, a, b, W).flatten()])




In [None]:
def create_random_state(N, M):
    s = np.random.randint(0, 2, N)
    a = np.random.rand(N)
    b = np.random.rand(M)
    W = np.random.rand(N, M)

    return s, a, b, W

random_s, random_a, random_b, random_W = create_random_state(2, 2)
random_theta = np.concatenate([random_a, random_b, random_W.flatten()])


In [None]:
def loss(theta, s):
    return np.sum(0.5 * (rbm_phi_theta(s, theta) - )**2)

In [63]:


def gradian_descent(s, theta, learning_rate, max_iter):
    for i in range(max_iter):
        theta -= learning_rate * o_der_theta(s, theta)
    return theta

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (4, 2) + inhomogeneous part.