# Reccomendation systems and matrix factorization: simple problem

In [1]:
import numpy as np


Goal: predict rating matrix R

In [2]:

def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    '''
    R: rating matrix
    P: |U| * K (User features matrix)
    Q: |D| * K (Item features matrix)
    K: latent features
    steps: iterations
    alpha: learning rate
    beta: regularization parameter'''
    Q = Q.T

    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    # calculate error
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j])

                    for k in range(K):
                        # calculate gradient with a and beta parameter
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])

        eR = np.dot(P,Q)

        e = 0

        for i in range(len(R)):

            for j in range(len(R[i])):

                if R[i][j] > 0:

                    e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2)

                    for k in range(K):

                        e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
        # 0.001: local minimum
        if e < 0.001:

            break

    return P, Q.T

In [6]:
R = [

     [5,3,0,1],

     [4,0,0,1],

     [1,1,0,5],

     [1,0,0,4],

     [0,1,5,4],
    
     [2,1,3,0],

    ]

R = np.array(R)
# N: num of User
N = len(R)
# M: num of Movie
M = len(R[0])
# Num of Features
K = 5

print(N, ' users, ', M, ' movies, ', K ,' latent features. \n')

 
P = np.random.rand(N,K)
Q = np.random.rand(M,K)

 

nP, nQ = matrix_factorization(R, P, Q, K)

print(nP,'\n',nQ)

nR = np.dot(nP, nQ.T)

6  users,  4  movies,  5  latent features. 

[[ 1.53656129  1.24614958 -0.10221173  1.00163894  0.41025439]
 [ 1.33376243  0.92530544  0.04562848  0.76676008  0.35558781]
 [ 0.15725308  0.43873174  1.93049583  0.56083617  0.55930663]
 [-0.05871996  0.15187616  1.31077021  0.87664054  0.6603998 ]
 [ 0.1158897   0.94437145  1.08147067  0.70980613  1.00155184]
 [ 0.2900646   0.50898561  0.48712224  0.7161552   0.54106564]] 
 [[ 1.33511827  1.28726029 -0.38340059  0.9487346   0.82408923]
 [ 1.18804757  0.69868631  0.16694487  0.32945993 -0.11093618]
 [ 0.53556405  1.63883002  1.27608172  1.08432395  1.17501228]
 [-0.27187323  0.41486887  2.0080886   0.6945079   0.99474551]]


In [4]:
print(R)
print(nR)

[[5 3 0 1]
 [4 0 0 1]
 [1 1 0 5]
 [1 0 0 4]
 [0 1 5 4]
 [2 1 3 0]]
[[4.99066211 2.95094562 3.57483161 1.00141365]
 [3.97622203 2.07457151 3.48623887 0.99842713]
 [1.03849821 0.92565163 3.84873891 4.96856383]
 [0.99128311 0.57169908 3.48270744 3.97837047]
 [2.52536799 1.07708425 4.949422   4.00483039]
 [1.95470206 1.00878967 3.0218246  2.38007949]]
