In [58]:
import numpy as np


def loss(U, M, R, reg=0.0):
    diff = np.dot(U, M.T) - R
    loss = 0.5 * np.sum(diff * diff)
    loss += reg * np.sum(U) / 2
    loss += reg * np.sum(M) / 2
    return loss


def compute_grad(U, M, R):
    u_grad = np.zeros(U.shape)
    m_grad = np.zeros(M.shape)
    
    num_user, lat_dim = U.shape
    num_movie, lat_dim = M.shape
    
    diff = np.dot(U, M.T) - R
    for i in range(num_user):
        u_grad[i] = np.sum(diff[i].reshape(num_movie, 1) * M, axis=0)

    for j in range(num_movie):
        m_grad[j] = np.sum(diff.T[j].reshape(num_user, 1) * U, axis=0)
        
    return u_grad, m_grad


def compute_num_grad(U, M, R, loss_func, h=1e-5):
    num_grad_u = np.zeros(U.shape)
    num_grad_m = np.zeros(M.shape)
    
    U_dim, L_dim = U.shape
    M_dim, L_dim = M.shape
    
    for i in range(U_dim):
        for k in range(L_dim):
            old_val = U[i][k]
            
            U[i][k] = old_val + h
            fuph = loss_func(U, M, R)
            
            U[i][k] = old_val - h
            fumh = loss_func(U, M, R)
            
            U[i][k] = old_val
            num_grad_u[i][k] = (fuph - fumh) / (2 * h)
    
    for j in range(M_dim):
        for k in range(L_dim):
            old_val = M[j][k]
            
            M[j][k] = old_val + h
            fmph = loss_func(U, M, R)
            
            M[j][k] = old_val - h
            fmmh = loss_func(U, M, R)
            
            M[j][k] = old_val
            num_grad_m[j][k] = (fmph - fmmh) / (2 * h)
    
    return num_grad_u, num_grad_m
            

num_user = 3
num_movie = 3
lat_dim = 4
reg = 0

R = np.random.rand(num_user, num_movie)
U = np.random.rand(num_user, lat_dim)
M = np.random.randn(num_movie, lat_dim)

np.dot(U, M.T)

print loss(U, M, R)
print compute_grad(U, M, R)
print compute_num_grad(U, M, R, loss)
            
        

3.5496908976666295
(array([[ 1.24204969,  0.96135851,  1.10710964,  2.41394578],
       [ 1.12840367,  0.05876261,  1.21410547,  1.68751955],
       [-0.15308083,  1.3135047 , -0.33996154,  0.37211262]]), array([[-1.268399  , -2.16988198, -0.91572637, -1.51955291],
       [ 0.9911696 ,  1.57861042,  0.78144179,  1.15781479],
       [ 1.32614948,  0.81886517,  0.98172185,  0.84078782]]))
(array([[ 1.24204969,  0.96135851,  1.10710964,  2.41394578],
       [ 1.12840367,  0.05876261,  1.21410547,  1.68751955],
       [-0.15308083,  1.3135047 , -0.33996154,  0.37211262]]), array([[-1.268399  , -2.16988198, -0.91572637, -1.51955291],
       [ 0.9911696 ,  1.57861042,  0.78144179,  1.15781479],
       [ 1.32614948,  0.81886517,  0.98172185,  0.84078782]]))


In [47]:
num_user = 3
num_movie = 6
lat_dim = 2
U = np.random.randn(num_user, lat_dim)
M = np.random.randn(num_movie, lat_dim)
diff = np.random.randn(num_user, num_movie)

grad_u = np.zeros(U.shape)
grad_m = np.zeros(M.shape)
for i in range(num_user):
    grad_u[i] = np.sum(diff[i].reshape(diff.shape[1], 1) * M, axis=0) # vector dimension is now (lat_dim,) 

for j in range(num_movie):
    grad_m[j] = np.sum(diff.T[j].reshape(diff.shape[0], 1) * U, axis=0)
    
print grad_u
print grad_m


[[ 0.66028601 -6.06697783]
 [ 1.45826095 -0.48272723]
 [ 0.76705498  1.47248015]]
[[ 2.16512191 -0.14728376]
 [-2.44554935  0.48069103]
 [-0.35991685 -2.06944839]
 [ 3.34090557  1.02299273]
 [ 1.25544474 -1.62462961]
 [-2.17379815  1.23300123]]
