In [1]:
import numpy as np

In [2]:
user_item_matrix = np.array([[5, 3, 0, 2, 1], [4, 0, 0, 2, 1], [1, 3, 1, 0, 5], [1, 0, 2, 0, 4]])

1. Singular Value Decomposition (SVD)

In [3]:
import scipy

U, S, Vh = scipy.linalg.svd(user_item_matrix, full_matrices=False)

user_matrix = np.dot(U, np.diag(S))

item_matrix = Vh

# Print the learned user and item matrices
print('Learned User Matrix:')
print(user_matrix)

print('Learned Item Matrix:')
print(item_matrix)

Learned User Matrix:
[[ 5.43202282 -2.91659222 -0.93896115 -0.32399058]
 [ 3.60310831 -2.38145478  1.47881379  0.39924103]
 [ 4.99132244  3.11531436 -1.13793608  0.29430966]
 [ 3.34336945  2.65423747  1.63067173 -0.34324022]]
Learned Item Matrix:
[[ 0.63510826  0.39793542  0.14861237  0.22995808  0.60275911]
 [-0.59296528  0.01927595  0.27236776 -0.34260524  0.67561681]
 [ 0.24395563 -0.88724353  0.30237083  0.15374882  0.19549361]
 [-0.15341309 -0.18993951 -0.83655012  0.32103755  0.37081828]]


In [4]:
np.dot(user_matrix, item_matrix)

array([[ 5.00000000e+00,  3.00000000e+00,  4.85235125e-16,
         2.00000000e+00,  1.00000000e+00],
       [ 4.00000000e+00,  9.37725527e-17, -1.07408510e-15,
         2.00000000e+00,  1.00000000e+00],
       [ 1.00000000e+00,  3.00000000e+00,  1.00000000e+00,
         4.23722441e-16,  5.00000000e+00],
       [ 1.00000000e+00, -1.12161970e-15,  2.00000000e+00,
         6.28414357e-16,  4.00000000e+00]])

2. Non-Negative Matrix Factorization (NMF)

In [5]:
from sklearn.decomposition import NMF

nmf = NMF(n_components=6)

user_matrix = nmf.fit_transform(user_item_matrix)

item_matrix = nmf.components_

# Print the learned user and item matrices
print('Learned User Matrix:')
print(user_matrix)

print('Learned Item Matrix:')
print(item_matrix)

Learned User Matrix:
[[1.50046562e+00 0.00000000e+00 1.45859014e-01 4.58685632e-01
  5.75047598e-01 7.99415173e-01]
 [1.66229226e+00 0.00000000e+00 6.51330590e-06 5.54628919e-06
  1.22132381e+00 1.57613648e+00]
 [6.53666143e-01 1.16314609e+00 2.83316559e+00 0.00000000e+00
  5.47326516e-04 0.00000000e+00]
 [0.00000000e+00 2.32420747e+00 0.00000000e+00 0.00000000e+00
  1.10596091e+00 0.00000000e+00]]
Learned Item Matrix:
[[1.46163786e+00 0.00000000e+00 0.00000000e+00 4.86432760e-03
  5.42742565e-01]
 [3.01702343e-03 0.00000000e+00 8.60260254e-01 0.00000000e+00
  1.72105819e+00]
 [1.43228395e-02 1.05888755e+00 0.00000000e+00 0.00000000e+00
  9.33013671e-01]
 [4.46528407e+00 6.20370796e+00 0.00000000e+00 2.14265152e+00
  0.00000000e+00]
 [8.97830701e-01 0.00000000e+00 2.09317267e-04 7.99341887e-04
  0.00000000e+00]
 [3.00591636e-01 0.00000000e+00 0.00000000e+00 1.26307687e+00
  6.20339056e-02]]


In [6]:
np.dot(user_matrix, item_matrix)

array([[4.99998103e+00, 3.00000000e+00, 1.20367392e-04, 2.00028470e+00,
        1.00004586e+00],
       [4.00000962e+00, 4.13044169e-05, 2.55644163e-04, 1.99985560e+00,
        9.99976741e-01],
       [1.00000281e+00, 3.00000377e+00, 1.00060846e+00, 3.18008376e-03,
        4.99999676e+00],
       [9.99977850e-01, 0.00000000e+00, 1.99965480e+00, 8.84040883e-04,
        4.00009629e+00]])

3. Gradient Descent

In [7]:
num_users, num_items = user_item_matrix.shape

# Set the number of latent features (factors)
num_factors = 6

# Set the learning rate and regularization parameter
learning_rate = 0.01
regularization = 0.1

# Initialize user and item matrices with random values
user_matrix = np.random.random((num_users, num_factors))
item_matrix = np.random.random((num_factors, num_items))

# Define the loss function (Mean Squared Error)
def loss(user_matrix, item_matrix, user_item_matrix, regularization):
    predicted_ratings = np.dot(user_matrix, item_matrix)
    mse = np.sum((user_item_matrix - predicted_ratings) ** 2) / np.sum(user_item_matrix != 0)
    reg_term = 0.5 * regularization * (np.sum(user_matrix ** 2) + np.sum(item_matrix ** 2))
    total_loss = mse + reg_term
    return total_loss

# Perform gradient descent to minimize the loss
num_iterations = 1000

for i in range(num_iterations):
    # Compute predicted ratings and error
    predicted_ratings = np.dot(user_matrix, item_matrix)
    error = user_item_matrix - predicted_ratings

    # Update user and item matrices using gradient descent
    user_matrix -= learning_rate * (-2 * np.dot(error, item_matrix.T) + 2 * regularization * user_matrix)
    item_matrix -= learning_rate * (-2 * np.dot(user_matrix.T, error) + 2 * regularization * item_matrix)

    # Compute and print the loss
    current_loss = loss(user_matrix, item_matrix, user_item_matrix, regularization)
    if i % 100 == 0:
        print(f'Iteration {i}, Loss: {current_loss}')

# Print the learned user and item matrices
print('Learned User Matrix:')
print(user_matrix)

print('Learned Item Matrix:')
print(item_matrix)

Iteration 0, Loss: 4.876967451629531
Iteration 100, Loss: 1.755121429415912
Iteration 200, Loss: 1.751467938862448
Iteration 300, Loss: 1.746634235632274
Iteration 400, Loss: 1.7429525416012415
Iteration 500, Loss: 1.7410169355743486
Iteration 600, Loss: 1.7400430058447651
Iteration 700, Loss: 1.7395436963721427
Iteration 800, Loss: 1.7392787589967538
Iteration 900, Loss: 1.7391325807219917
Learned User Matrix:
[[ 0.67479719  1.22182937  0.54187496  0.84939027 -0.08366531  1.51050801]
 [ 1.15663578  1.25573014  0.47612732 -0.1482253   0.47401612  0.22807891]
 [-0.55181591  0.08820665  1.27951717  1.5644909   0.81352377 -0.00179925]
 [ 0.39076422 -0.46849813  0.74162703  0.33175222  1.59962412 -0.00167432]]
Learned Item Matrix:
[[ 1.27600512 -0.50489691  0.17073203  0.59536369 -0.07165627]
 [ 1.42842279  0.39246052 -0.56763311  0.86721117 -0.08949339]
 [ 0.59327134  0.59266464  0.20284271  0.18480065  1.38485241]
 [ 0.33846081  1.35459745  0.12831997 -0.00787138  1.17250867]
 [ 0.376279

In [8]:
np.dot(user_matrix, item_matrix)

array([[ 4.93136980e+00,  2.93620193e+00, -2.36937131e-02,
         1.98857934e+00,  1.02297084e+00],
       [ 3.94412530e+00,  4.52375504e-02,  3.75294905e-02,
         1.94868498e+00,  9.71900221e-01],
       [ 1.01452726e+00,  2.94657978e+00,  1.02531686e+00,
        -9.55761203e-04,  4.92067028e+00],
       [ 9.81644315e-01,  2.91457346e-02,  1.92085391e+00,
         1.43137412e-02,  3.95158190e+00]])