In [1]:
import csv

In [10]:
with open('Books.csv') as file:
    reader = csv.reader(file, delimiter = ',')
    titles = {}
    for line in reader:
        titles[line[0]] = line[1]

In [14]:
with open('Ratings.csv') as file:
    reader = csv.reader(file, delimiter = ',')
    data = {}
    for line in reader:
        try:
            if line[0] in data: data[line[0]].append((titles[line[1]], line[2]))
            else: data[line[0]] = [(titles[line[1]], line[2])]
        except:continue

Code thanks to [Denise Chen](https://towardsdatascience.com/recommendation-system-matrix-factorization-d61978660b4b)

In [1]:
import numpy

def matrix_factorization(R, P, Q, K, steps=1000, alpha=0.001, beta=0.02):
    '''
    R: rating matrix
    P: |U| * K (User features matrix)
    Q: |D| * K (Item features matrix)
    K: latent features
    steps: iterations
    alpha: learning rate
    beta: regularization parameter'''
    Q = Q.T

    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0: #Skipping over unknown values!
                    # calculate error
                    eij = R[i][j] - numpy.dot(P[i,:],Q[:,j]) #The value of the cell - the dot product of relevant P and Q cells.
                    #print(eij, R[i][j], numpy.dot(P[i,:],Q[:,j]))
                    for k in range(K):
                        # calculate gradient with a and beta parameter
                        #print(P[i][k])
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        #print(P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])

        eR = numpy.dot(P,Q)

        e = 0

        for i in range(len(R)):

            for j in range(len(R[i])):

                if R[i][j] > 0:

                    e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)

                    for k in range(K):

                        e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
        # 0.001: local minimum
        if e < 0.001: #This is just to say we've found the minimum cost!

            break

    return P, Q.T

In [6]:
R = [

     [5,3,0,1],

     [4,0,0,1],

     [1,1,0,5],

     [1,0,0,4],

     [0,1,5,4],
    
     [2,1,3,0],

    ]

R = numpy.array(R)
# N: num of User
N = len(R)
# M: num of Movie
M = len(R[0])
# Num of Features
K = 3

 
P = numpy.random.rand(N,K)
Q = numpy.random.rand(M,K)

 
print(P)
nP, nQ = matrix_factorization(R, P, Q, K)

nR = numpy.dot(nP, nQ.T) #.T = Transpose

[[0.6331963  0.02821366 0.79406594]
 [0.59718642 0.69803112 0.23586436]
 [0.90730732 0.11558956 0.37643335]
 [0.64878761 0.54341905 0.32792832]
 [0.78073867 0.44367831 0.6083233 ]
 [0.05791396 0.32403569 0.87369434]]


In [7]:
print(nP)
print(nQ.T)

[[ 1.92599662 -0.25657616  1.29442948]
 [ 1.78505984  0.00634273  0.49005629]
 [ 0.5229933   1.93069177  0.77689252]
 [ 0.48903087  1.54604156  0.58084758]
 [ 0.63795735  1.40117534  1.08197632]
 [ 0.56111109  0.4559302   1.05738178]]
[[ 1.98521307  1.13028526  0.9899724   0.37125736]
 [-0.32088427 -0.10006231  1.89974239  2.20516894]
 [ 0.86314569  0.53371398  1.50465952  0.65554288]]


In [8]:
print(nR)

[[5.02312615 2.89345429 3.36693053 0.99780066]
 [3.96467883 2.27854204 2.51657739 0.99795631]
 [1.08929596 0.81258054 5.35452467 4.96095301]
 [0.97608615 0.70805036 4.29518558 3.97160963]
 [1.75076933 1.15833483 4.92143832 4.03595657]
 [1.88029876 1.1529336  3.01263399 1.90687884]]
