In [103]:
import numpy as np                 

In [104]:
r = np.array([[5, 3, 0, 1],
              [4, 0, 0, 1],
              [1, 1, 0, 5],
              [1, 0, 0, 4],
              [0, 1, 5, 4]])

num_row_r = r.shape[0]
num_col_r = r.shape[1]
print(f'The {num_row_r}X{num_col_r} matrix is:')
print('R=',r)

k = 2    # Number of latent factors/features (hyperparameter)
# Fill p and q with random floats between 0 and 1
p = np.random.rand(num_row_r, k)
q = np.random.rand(num_col_r, k)    # easier to leave in this shape and transpose later
print('\nP Matrix:')
print(p)
print('\nQ Matrix:')
print(q.T)    # q is transposed


The 5X4 matrix is:
R= [[5 3 0 1]
 [4 0 0 1]
 [1 1 0 5]
 [1 0 0 4]
 [0 1 5 4]]

P Matrix:
[[0.14455814 0.9670023 ]
 [0.56851564 0.19724023]
 [0.5987839  0.24759262]
 [0.56006938 0.86578416]
 [0.83100717 0.52536886]]

Q Matrix:
[[0.03363222 0.81248024 0.41544704 0.5589154 ]
 [0.19086376 0.96668986 0.9240358  0.55881517]]


In [116]:
# Hyper parameters
steps = 5000     # epochs
alpha = 0.0002   # learning rate
beta = 0.02      # for regularization to avoid over fitting 
def matrix_factorization(r, p, q, k, steps, alpha, beta):
    
    samples = [
        (i, j, r[i,j])
        for i in range(num_row_r)
        for j in range(num_col_r)
        if r[i,j] > 0
    ]

    # Main gradient descent loop (repeats for number of steps)
    for i in range(steps):

        # Stochastic Gradient Descent 
        for i, j, rating in samples:    # for every rating with a non-zero value

            # Get prediction and error
            prediction = np.matmul(p[i, :], q[j, :])
            e = (rating - prediction)

            # Update p and q matrices
            p[i, :] += alpha * (e * q[j, :] - beta * p[i, :])    
            q[j, :] += alpha * (e * p[i, :] - beta * q[j, :])    
            
    return p, q.T    # q is transposed
[new_p, new_q] = matrix_factorization(r, p, q, k, steps, alpha, beta)
print('New P Matrix:')
print(new_p)
print('New Q Matrix:')
print(new_q)


New P Matrix:
[[ 1.8068469   0.46846776]
 [ 2.36709944  0.45786076]
 [ 0.19477121  2.03125401]
 [ 0.25181278  1.67642267]
 [ 1.12972185  1.73042761]
 [ 2.66854292 -0.26943813]]
New Q Matrix:
[[ 1.99968315  1.69003273  2.87640868 -0.04547614  0.58239146]
 [ 0.31422612  0.20726426  0.57290299  2.37973344  1.90844112]]


In [108]:
r_predicted = np.matmul(new_p, new_q)
print('Predicted Ratings')
print(r_predicted.round(4))
print('Actual Ratings')
print(r)

Predicted Ratings
[[5.0726 2.7647 4.3762 0.9621]
 [3.8243 2.0915 3.5388 1.0521]
 [1.0855 0.695  4.3981 4.9258]
 [1.0154 0.6346 3.5951 3.9001]
 [2.5016 1.4424 4.8039 4.082 ]]
Actual Ratings
[[5 3 0 1]
 [4 0 0 1]
 [1 1 0 5]
 [1 0 0 4]
 [0 1 5 4]]


In [110]:
r= np.array([[4, 3, 0, 1, 2],
              [5, 0, 0, 1, 0],
              [1, 2, 1, 5, 4],
              [1, 0, 0, 4, 0],
              [0, 1, 5, 4, 0],
              [5, 5, 0, 0, 1]])

num_row_r = r.shape[0]
num_col_r = r.shape[1]
print(f'The {num_row_r}X{num_col_r} matrix is:')
print('R=',r)

k = 2    # Number of latent factors/features (hyperparameter)
# Fill p and q with random floats between 0 and 1
p = np.random.rand(num_row_r, k)
q = np.random.rand(num_col_r, k)    # easier to leave in this shape and transpose later
print('\nP Matrix:')
print(p)
print('\nQ Matrix:')
print(q.T)    # q is transposed


The 6X5 matrix is:
R= [[4 3 0 1 2]
 [5 0 0 1 0]
 [1 2 1 5 4]
 [1 0 0 4 0]
 [0 1 5 4 0]
 [5 5 0 0 1]]

P Matrix:
[[0.7750575  0.23778335]
 [0.05837291 0.13415622]
 [0.76998539 0.51224262]
 [0.36788918 0.69062972]
 [0.56368709 0.17415404]
 [0.4551278  0.20035173]]

Q Matrix:
[[0.57930189 0.35119636 0.21178454 0.2160828  0.11848607]
 [0.32884069 0.19917857 0.55829582 0.42270987 0.08771619]]


In [112]:
# Hyper parameters
steps = 10000     # epochs
alpha = 0.0002   # learning rate
beta = 0.02      # for regularization to avoid over fitting and prevent extreme values

In [117]:
# Hyper parameters
steps = 10000     # epochs
alpha = 0.0002   # learning rate
beta = 0.02      # for regularization to avoid over fitting and prevent extreme values
def matrix_factorization(r, p, q, k, steps, alpha, beta):
    # List of training samples (non-zero values from ratings matrix)
    # i,j are the coordinates of r
    # r[i,j] is the rating at the corresonding coordinates
    samples = [
        (i, j, r[i,j])
        for i in range(num_row_r)
        for j in range(num_col_r)
        if r[i,j] > 0
    ]

    # Main gradient descent loop (repeats for number of steps)
    for i in range(steps):

        # Stochastic Gradient Descent 
        for i, j, rating in samples:    # for every rating with a non-zero value

            # Get prediction and error
            prediction = np.matmul(p[i, :], q[j, :])
            e = (rating - prediction)

            # Update p and q matrices
            p[i, :] += alpha * (e * q[j, :] - beta * p[i, :])    
            q[j, :] += alpha * (e * p[i, :] - beta * q[j, :])   
            
    return p, q.T    # q is transposed
[new_p, new_q] = matrix_factorization(r, p, q, k, steps, alpha, beta)
print('New P Matrix:')
print(new_p)
print('New Q Matrix:')
print(new_q)


New P Matrix:
[[ 1.90883856  0.48730693]
 [ 2.51479318  0.48935692]
 [ 0.16226417  2.07679546]
 [ 0.20572735  1.66669533]
 [ 1.08580666  1.67401262]
 [ 2.8727106  -0.27667221]]
New Q Matrix:
[[ 1.86420702  1.58623156  3.37645907 -0.06463506  0.54171075]
 [ 0.37526955  0.29378167  0.45384668  2.3903566   1.8712632 ]]


In [115]:
r_predicted = np.matmul(new_p, new_q)
print('Predicted Ratings')
print(r_predicted.round(4))
print('Actual Ratings')
print(r)

Predicted Ratings
[[ 3.7527  3.2036  4.0292  1.0857  1.8626]
 [ 4.9097  4.2013  5.1242  0.9105  2.0097]
 [ 0.9394  0.719   2.226   4.4881  3.9985]
 [ 1.0509  0.8248  2.1891  3.978   3.5995]
 [ 2.6402  2.1829  3.8757  4.3692  4.3309]
 [ 5.2347  4.5044  5.0959 -0.3019  1.0766]]
Actual Ratings
[[4 3 0 1 2]
 [5 0 0 1 0]
 [1 2 1 5 4]
 [1 0 0 4 0]
 [0 1 5 4 0]
 [5 5 0 0 1]]
