In [9]:
"""Verify the gradient derivation of linear autoencoder."""
import torch
import torch.nn as nn
from torch import transpose, mm

In [10]:
# Initialise matrix and vector
N = 6
M = 5
Y = torch.rand(N, 1)
W1 = torch.rand(M, N, requires_grad=True)
W2 = torch.rand(N, M, requires_grad=True)

In [11]:
# define loss function in terms of W1 and W2
def loss_func_W1(W1):
    return mm(transpose(Y-mm(W2, mm(W1, Y)), 0, 1), Y-mm(W2, mm(W1, Y)))

def loss_func_W2(W2):
    return mm(transpose(Y-mm(W2, mm(W1, Y)), 0, 1), Y-mm(W2, mm(W1, Y)))

In [12]:
# find the theoretical and numerical solutions of W1 and W2
grad_w1_true = 2*mm(mm(transpose(W2, 0, 1), mm(W2, W1)-torch.eye(N)), mm(Y, transpose(Y, 0, 1)))
grad_w2_true = 2*mm(mm(mm(W2, W1)-torch.eye(N), mm(Y, transpose(Y, 0, 1))), transpose(W1, 0, 1))

grad_w1 = torch.autograd.functional.jacobian(loss_func_W1, W1)
grad_w2 = torch.autograd.functional.jacobian(loss_func_W2, W2)

norm_diff_w1 = torch.linalg.matrix_norm(grad_w1-grad_w1_true)
norm_diff_w2 = torch.linalg.matrix_norm(grad_w2-grad_w2_true)

print("The matrix norm of the difference between theoretical and numerical solutions of W1:", norm_diff_w1.item())
print("The matrix norm of the difference between theoretical and numerical solutions of W2:", norm_diff_w2.item())

The matrix norm of the difference between theoretical and numerical solutions of W1: 5.540347046917304e-06
The matrix norm of the difference between theoretical and numerical solutions of W2: 3.0439246074820403e-06


In [31]:
# gradient check for multiple samples
# Initialise matrix and vector
n = 6
p = 5
m = 4

X = torch.rand(m, n)
W1 = torch.rand(p, n, requires_grad=True)
W2 = torch.rand(n, p, requires_grad=True)

In [40]:
# define loss function in terms of W1 and W2
# here we use two ways to implement loss
def loss_func_W1(W1):
    z = mm(X, mm(transpose(W1, 0, 1), transpose(W2, 0, 1)))-X
    return sum(sum(torch.mul(z, z))) / m

def loss_func_W2(W2):
    return nn.MSELoss()(mm(X, mm(transpose(W1, 0, 1), transpose(W2, 0, 1))), X) * n

In [41]:
# find the theoretical and numerical solutions of W1 and W2
grad_w1_true = 2 * mm(mm(transpose(W2, 0, 1), mm(W2, W1)-torch.eye(n)), mm(transpose(X, 0, 1), X)) / m
grad_w2_true = 2 * mm(mm(mm(W2, W1)-torch.eye(n), mm(transpose(X, 0, 1), X)), transpose(W1, 0, 1)) / m

grad_w1 = torch.autograd.functional.jacobian(loss_func_W1, W1)
grad_w2 = torch.autograd.functional.jacobian(loss_func_W2, W2)

norm_diff_w1 = torch.linalg.matrix_norm(grad_w1-grad_w1_true)
norm_diff_w2 = torch.linalg.matrix_norm(grad_w2-grad_w2_true)

print("The matrix norm of the difference between theoretical and numerical solutions of W1:", norm_diff_w1.item())
print("The matrix norm of the difference between theoretical and numerical solutions of W2:", norm_diff_w2.item())

The matrix norm of the difference between theoretical and numerical solutions of W1: 6.468133960879641e-06
The matrix norm of the difference between theoretical and numerical solutions of W2: 5.820539172418648e-06
