Considering the problem of matrix factorization $Y=W^\top X$, where $Y\in \mathbb{R}^{m\times n}$ is the observed data matrix, $W\in\mathbb{R}^{m\times r}$ and $X\in \mathbb{R}^{n\times r}$ are two latent feature matrices. 

#### Loss

$$
\begin{aligned}
L &= ||Y - W^\top X||_F^2\\
~ &= \text{tr}\left((Y-WX^\top)^\top(Y-WX^\top)\right)\\
~ &= \text{tr}(Y^\top Y)-\text{tr}(XW^\top Y) - \text{tr}(Y^\top WX^\top)+\text{tr}(XW^\top WX^\top)\\
~ &= \text{tr}(Y^\top Y)-\text{tr}(YXW^\top) - \text{tr}(X^\top Y^\top W)+\text{tr}(WX^\top XW^\top)
\end{aligned}
$$

#### Derivative

$$
\frac{\partial L}{\partial X} = -Y^\top W + XW^\top W
$$

$$
\frac{\partial L}{\partial W} = -Y X + WX^\top X
$$

#### Backpropagation

$$
\begin{aligned}
W^{(k+1)} &= W^{(k)} - \eta \frac{\partial L}{\partial W}|_{W^{(k)}}\\
~& = W^{(k)} - \eta (-YX^{(k)}+W^{(k)}X^{(k)\top}X^{(k)})
\end{aligned}
$$



In [2]:
import numpy as np

directory = '../../datasets/Seattle_loop-data-set/'
A = np.load( directory + 'Loop_Seattle_2015_A.npy')
dense_mat = np.load( directory + 'dense_mat.npy')

print('Dataset shape:')
print(dense_mat.shape)

missing_rate = 0.0
# =============================================================================
### Random missing (PM) scenario
### Set the PM scenario by:
rm_random_mat = np.load(directory + 'rm_random_mat.npy')
binary_mat = np.round(rm_random_mat + 0.5 - missing_rate)
# =============================================================================
# =============================================================================
### Non-random missing (CM) scenario
### Set the CM scenario by:
# nm_random_mat = np.load(directory + 'nm_random_mat.npy')
# binary_tensor = np.zeros((dense_mat.shape[0], 61, 288))
# for i1 in range(binary_tensor.shape[0]):
#     for i2 in range(binary_tensor.shape[1]):
#         binary_tensor[i1, i2, :] = np.round(nm_random_mat[i1, i2] + 0.5 - missing_rate)
# binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]])
# =============================================================================

sparse_mat = np.multiply(dense_mat, binary_mat)

Dataset shape:
(323, 17568)


In [3]:
rank = 60
dim1, dim2 = sparse_mat.shape
W = np.random.rand(dim1, rank)
X = np.random.rand(dim2, rank)
Maxiter = 10
eta = 0.01
for i in range(Maxiter):
    W = W - eta * (- sparse_mat @ X + W @ X.T @ X)
    X = X - eta * (- sparse_mat.T @ W + X @ W.T @ W)
    print('Loss:')
    print(np.linalg.norm((sparse_mat - W @ X.T), 'fro'))
    print()

Loss:
6.573608615534675e+17

Loss:
8.239645759727858e+93



  if __name__ == '__main__':
  if __name__ == '__main__':


Loss:
inf

Loss:
nan

Loss:
nan

Loss:
nan

Loss:
nan

Loss:
nan

Loss:
nan

Loss:
nan

