In [1]:
# Benchmark
# -  Collaborative Filtering User-based
# -  Collaborative Filtering Item-based

def namestr(obj):
    namespace = globals()
    return [name for name in namespace if namespace[name] is obj]

def pprint(obj):
    print(namestr(obj))
    print(obj)

In [2]:
import numpy as np
from movieLensAnalyzer import MovieLensAnalyzer 
import random 

movieLensAnalyzer = MovieLensAnalyzer()
# Make the matrix small to only numElements
movieLensAnalyzer.simplifyMatrix(20)
# Returns as a numpy array
userMovieRatingMatrix = movieLensAnalyzer.getUserMovieRatingMatrix()
percentageZero = 0.3
movieLensAnalyzer.generateMask(percentageZero)
# Anything 3 and above labeled positive (1) and below 3 labeled negative (0)
movieLensAnalyzer.generatePositiveNegatives(3)


# Returns as a numpy array
userMovieRatingMatrix = movieLensAnalyzer.getUserMovieRatingMatrix()
trainMatrix = movieLensAnalyzer.getMaskedUserMovieRatingMatrix()
r = trainMatrix
rtest = userMovieRatingMatrix

# '''
# NOTE: Slicing below for debugging purposes only
# Slice out some part of matrix so can print
print(userMovieRatingMatrix.shape)
# '''
'''
userMovieRatingMatrix = np.zeros((3,3))
userMovieRatingMatrix[0][0] = 1
userMovieRatingMatrix[1][0] = 1
userMovieRatingMatrix[1][1] = 1
userMovieRatingMatrix[2][1] = 1
userMovieRatingMatrix[2][2] = 1
# '''

print(userMovieRatingMatrix.shape)

(20, 20)
(20, 20)


In [3]:
# Collaborative Filtering
import numpy as np

# Item based similarity
itemSimilarity = np.dot(np.transpose(r), r)
pprint(userMovieRatingMatrix)
# pprint(itemSimilarity)

# Note: If do not normalize, both item based and user based CF will result in same values

# Normalize to get rating from valid values
row_sums = itemSimilarity.sum(axis=1)
row_sums[np.where(row_sums== 0)] = 1
itemSimilarity = itemSimilarity / row_sums[:, np.newaxis]

# pprint(itemSimilarity)

reconstructionItemSimilarity = np.dot(r, itemSimilarity)
pprint(reconstructionItemSimilarity)
#print(reconstructionItemSimilarity.shape)

# User based similarity
userSimilarity = np.dot(r, np.transpose(r))

# Normalize to get rating from valid values
row_sums = userSimilarity.sum(axis=1)
row_sums[np.where(row_sums== 0)] = 1
userSimilarity =  userSimilarity/ row_sums[:, np.newaxis]

reconstructionUserSimilarity = np.dot(userSimilarity, r)

pprint(reconstructionUserSimilarity)

pprint(rtest)

rmseItemSimilarity = movieLensAnalyzer.rootMeanSquareError(reconstructionItemSimilarity)
rmseUserSimilarity = movieLensAnalyzer.rootMeanSquareError(reconstructionUserSimilarity)
rmseItemSimilarity =  np.sqrt(np.mean((reconstructionItemSimilarity- rtest)**2))
rmseUserSimilarity =  np.sqrt(np.mean((reconstructionUserSimilarity- rtest)**2))

pprint(rmseUserSimilarity)
pprint(rmseItemSimilarity)


['rtest', 'userMovieRatingMatrix']
[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  4.  0.  0.  0.  0.
   0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  5.  0.  0.  0.  0.
   0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.]
 [ 0.  0.  0.  0.  0.  4.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.]
 [ 3.  0.  3.  0.  0.  5.  0.  0.  0.  0.  0.  5.  3.  5.  0.  3.  2.  3.
   4.  4.]
 [ 0.  0.  0.  0.  0.  3.  0.  0.  0.  0.  0.  0.  0.  5.  0.  0.  0.  0.
   0.  0.]
 [ 0.  0.  0.  0.  0.  5.  0.  0.  0.  0.  5.  0.  0.  5.  0.  0.  0.  0.
   1.  4.]
 [ 5.  4.  5.  4.  5.  5.  0.  4.  5.  5.  4.  0.  0.  5.  4.  0.  0.  2.
   5.  3.]
 [ 0.  0.  0.  0.  0.  4.  0.  0.  5.  5.  4.  0.  0.  4.  0.  0.  0.  0.
   4.  4.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.]
 [ 0.  0.  2.  0.  5.  5.  0. 