In [1]:
import numpy as np

In [2]:
# Matrix R
R = np.array([[4, np.NaN, np.NaN, 2, np.NaN],
              [np.NaN, 5, np.NaN, 3, 1],
              [np.NaN, np.NaN, 3, 4, 4],
              [5, 2, 1, 2, np.NaN]
             ])

num_users, num_items = R.shape
K=3 # Latent Factor dim


# Random P and Q Matrix
np.random.seed(1)
P = np.random.normal(scale=1./K, size=(num_users, K))
Q = np.random.normal(scale=1./K, size=(num_items, K))

In [3]:
non_zeros = [(i, j, R[i, j]) for i in range(num_users) for j in range(num_items) if R[i, j] >0]
non_zeros

[(0, 0, 4.0),
 (0, 3, 2.0),
 (1, 1, 5.0),
 (1, 3, 3.0),
 (1, 4, 1.0),
 (2, 2, 3.0),
 (2, 3, 4.0),
 (2, 4, 4.0),
 (3, 0, 5.0),
 (3, 1, 2.0),
 (3, 2, 1.0),
 (3, 3, 2.0)]

In [4]:
from sklearn.metrics import mean_squared_error

def get_rmse(R, P, Q, non_zeros):
    error = 0
    
    pred_matrix = np.dot(P, Q.T)
    
    # RMSE between R and R_hat if R[i, j] is not Nan
    x_non_zero_ind = [non_zero[0] for non_zero in non_zeros]
    y_non_zero_ind = [non_zero[1] for non_zero in non_zeros]
    R_non_zeros = R[x_non_zero_ind, y_non_zero_ind] # can extract from non_zero[2]

    pred_matrix_non_zeros = pred_matrix[x_non_zero_ind, y_non_zero_ind]

    mse = mean_squared_error(R_non_zeros, pred_matrix_non_zeros)
    rmse = np.sqrt(mse)
    
    return rmse

In [5]:
# SGD

steps = 1000
learning_rate = 0.01
r_lambda = 0.01

for step in range(steps):
    for i, j, r in non_zeros:
        # error
        eij = r - np.dot(P[i, :], Q[j, :].T)
        
        # SDG with L2 Regularization
        P[i, :] = P[i, :] + learning_rate * (eij * Q[j, :] - r_lambda * P[i, :])
        Q[j, :] = Q[j, :] + learning_rate * (eij * P[i, :] - r_lambda * Q[j, :])
        
        #rmse
    rmse = get_rmse(R, P, Q, non_zeros)
        
    if (step % 50) == 0:
        print("Iteration Step: ", step, " rmse: ", rmse )

Iteration Step:  0  rmse:  3.2388050277987723
Iteration Step:  50  rmse:  0.4876723101369648
Iteration Step:  100  rmse:  0.1564340384819247
Iteration Step:  150  rmse:  0.07455141311978046
Iteration Step:  200  rmse:  0.04325226798579314
Iteration Step:  250  rmse:  0.029248328780878973
Iteration Step:  300  rmse:  0.022621116143829466
Iteration Step:  350  rmse:  0.019493636196525135
Iteration Step:  400  rmse:  0.018022719092132704
Iteration Step:  450  rmse:  0.01731968595344266
Iteration Step:  500  rmse:  0.016973657887570753
Iteration Step:  550  rmse:  0.016796804595895633
Iteration Step:  600  rmse:  0.01670132290188466
Iteration Step:  650  rmse:  0.01664473691247669
Iteration Step:  700  rmse:  0.016605910068210026
Iteration Step:  750  rmse:  0.016574200475705
Iteration Step:  800  rmse:  0.01654431582921597
Iteration Step:  850  rmse:  0.01651375177473524
Iteration Step:  900  rmse:  0.01648146573819501
Iteration Step:  950  rmse:  0.016447171683479155


In [6]:
pred_matrix = np.dot(P, Q.T)
print(np.round(pred_matrix, 3))

[[3.991 0.897 1.306 2.002 1.663]
 [6.696 4.978 0.979 2.981 1.003]
 [6.677 0.391 2.987 3.977 3.986]
 [4.968 2.005 1.006 2.017 1.14 ]]


NameError: name 'ratings' is not defined