# Recommender Systems

## 1. Movie Ratings dataset

In [1]:
import scipy.io

In [112]:
mat = scipy.io.loadmat('ex8_movies.mat')

In [113]:
mat.keys()

dict_keys(['__header__', '__version__', '__globals__', 'Y', 'R'])

In [179]:
Y = mat['Y'] #row: movies, col: people
Y.shape

(1682, 943)

In [178]:
R = mat['R']
R.shape

(1682, 943)

In [116]:
import numpy as np

In [117]:
index = np.argwhere(R[0]==1)
av = np.mean(Y[0, index])
print('average rating for movie 1 (toy story):',av)

average rating for movie 1 (toy story): 3.8783185840707963


In [118]:
y = np.mat(Y)

In [84]:
import cv2

In [10]:
ys = cv2.resize(y, (540,480))
ys = ys/np.max(y)
cv2.imshow('image',ys)
cv2.waitKey(0)
cv2.destroyAllWindows()

## 2. Collaborative filtering learning algorithm

### 2.1 Collaborative filtering cost function

In [119]:
mat2 = scipy.io.loadmat('ex8_movieParams.mat')

In [120]:
mat2.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'Theta', 'num_users', 'num_movies', 'num_features'])

In [180]:
X = mat2['X']
Theta = mat2['Theta']
num_users = mat2['num_users'][0][0]
num_movies = mat2['num_movies'][0][0]
num_features = mat2['num_features'][0][0]
print('X:',X.shape)
print('Theta:',Theta.shape)
print('num_users:',num_users)
print('num_movies:',num_movies)
print('num_features:',num_features)

X: (1682, 10)
Theta: (943, 10)
num_users: 943
num_movies: 1682
num_features: 10


In [181]:
param = np.concatenate((X,Theta), axis = None)
param.shape

(26250,)

In [265]:
def cofiCostFunc(param, Y, R, num_users, num_movies, num_features, Lambda):
    X = param[:num_movies*num_features].reshape(num_movies, num_features)
    Theta = param[num_movies*num_features:].reshape(num_users, num_features)
    diff = np.dot(X,Theta.transpose()) - Y
    J = np.sum( np.square( diff*R ))*0.5 + ( np.sum(np.square(Theta)) + np.sum(np.square(X)) )*0.5*Lambda
    gradX = np.dot((diff*R),Theta) + Lambda*X
    gradTheta = np.dot((diff*R).transpose(), X) + Lambda*Theta
    return [J, np.concatenate((gradX, gradTheta), axis = None)] #make grad as one col

In [193]:
nmov = 5
nuser = 4
nfea = 3
J, grad = cofiCostFunc(np.concatenate((X[0:nmov,0:nfea], Theta[0:nuser, 0:nfea]), axis = None),\
                 Y[0:nmov, 0:nuser], R[0:nmov, 0:nuser], nuser, nmov, nfea,0)
print('cost at loaded parameters:',J)

cost at loaded parameters: 22.224603725685675


## 2.2 Collaborative filtering gradient

In [194]:
J, grad = cofiCostFunc(np.concatenate((X[0:nmov,0:nfea], Theta[0:nuser, 0:nfea]), axis = None),\
                 Y[0:nmov, 0:nuser], R[0:nmov, 0:nuser], nuser, nmov, nfea,1.5)

In [199]:
print('J:',J)
print('grad.shape:', grad.shape)

J: 34821.703613072226
grad.shape: (26250,)


---
## 2.3 Gradient Checking

### 2.3.1 Create small size sets

In [223]:
num_movies = 4
num_features = 3
num_users = 5
X_t = np.random.rand(num_movies,num_features)
Theta_t = np.random.rand(num_users,num_features)
Y = np.dot(X_t, Theta_t.transpose())
print('X:',X_t)
print('Theta:',Theta_t)
print('Y:',Y)

X: [[0.72403614 0.16705117 0.26245705]
 [0.31477798 0.64370735 0.86229404]
 [0.38275277 0.2977935  0.46448874]
 [0.14330783 0.0114652  0.51363458]]
Theta: [[0.02709974 0.3981081  0.46070301]
 [0.98525788 0.11735834 0.52835058]
 [0.20620938 0.25551629 0.25713904]
 [0.37327124 0.39185163 0.32559285]
 [0.358049   0.92062236 0.46216462]]
Y: [[0.20704036 0.87163649 0.25947529 0.42117528 0.53432982]
 [0.66205697 0.84127547 0.45111735 0.65049211 1.10383912]
 [0.34291786 0.65747162 0.27445649 0.41079568 0.62586986]
 [0.24508099 0.41391983 0.16455647 0.22522109 0.29925007]]


### 2.3.2 Create missing data

In [224]:
Y[Y>0.5]=0
R = np.zeros(Y.shape)
R[Y != 0]=1
print('Y:',Y)
print('R:',R)

Y: [[0.20704036 0.         0.25947529 0.42117528 0.        ]
 [0.         0.         0.45111735 0.         0.        ]
 [0.34291786 0.         0.27445649 0.41079568 0.        ]
 [0.24508099 0.41391983 0.16455647 0.22522109 0.29925007]]
R: [[1. 0. 1. 1. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 1. 1. 0.]
 [1. 1. 1. 1. 1.]]


In [225]:
X = np.random.rand(num_movies,num_features)
Theta = np.random.rand(num_users,num_features)

### 2.3.3 Implement Gradient Checking

In [203]:
import math

In [217]:
theta = np.concatenate((X,Theta), axis = None)

In [226]:
def computeNumericalGradient(theta, num_users,num_movies, num_features, Lambda):
    numgrad = np.zeros(theta.size)
    e = math.exp(1e-4)
    perturb = np.zeros(theta.size)
    for i in range(0, theta.size):
        perturb[i] = e
        tmp = theta - perturb
        loss1, grad = cofiCostFunc(tmp, Y, R, num_users, num_movies, num_features, Lambda)
        tmp = theta + perturb
        loss2, grad = cofiCostFunc(tmp, Y, R, num_users, num_movies, num_features, Lambda)
        numgrad[i] = (loss2-loss1)/(2*e)
        perturb[i] = 0
    return numgrad

In [271]:
numgrad = computeNumericalGradient(theta, num_users, num_movies, num_features, 0)
numgrad

array([1.62449465, 1.92444568, 0.36878176, 0.64536909, 0.71017564,
       0.03097763, 1.21658585, 1.41094528, 0.25215686, 2.24883278,
       2.71628978, 1.04111987, 0.8975856 , 0.91120064, 0.82854109,
       0.16579596, 0.42093921, 0.12340519, 1.47094403, 1.67735364,
       1.40036094, 1.03821183, 1.08574674, 0.95131028, 0.10836773,
       0.27513473, 0.08066023])

In [272]:
J, grad = cofiCostFunc(theta, Y, R, num_users, num_movies, num_features, 0)
grad

array([1.62449465, 1.92444568, 0.36878176, 0.64536909, 0.71017564,
       0.03097763, 1.21658585, 1.41094528, 0.25215686, 2.24883278,
       2.71628978, 1.04111987, 0.8975856 , 0.91120064, 0.82854109,
       0.16579596, 0.42093921, 0.12340519, 1.47094403, 1.67735364,
       1.40036094, 1.03821183, 1.08574674, 0.95131028, 0.10836773,
       0.27513473, 0.08066023])

In [273]:
diff = np.linalg.norm(numgrad - grad)/(np.linalg.norm(numgrad+grad))
print('relative difference should be less 1e-9:  ', diff)

relative difference should be less 1e-9:   1.2744648693842892e-16


### 2.3.4 Regularized gradient checking

In [274]:
numgrad = computeNumericalGradient(theta, num_users, num_movies, num_features, 1.5)
numgrad

array([2.74192039, 2.39813995, 1.51830284, 1.49665158, 1.83471939,
       0.92438267, 1.92717893, 2.10213207, 0.8475189 , 2.69958127,
       3.86069469, 1.37662082, 1.58949466, 2.28229142, 1.30541143,
       0.83971107, 1.65013252, 1.50123568, 2.74753226, 3.08213407,
       1.46163701, 2.51532712, 2.43369066, 1.21324519, 1.01888391,
       1.02891388, 0.71159343])

In [275]:
J, grad = cofiCostFunc(theta, Y, R, num_users, num_movies, num_features, 1.5)
grad

array([2.74192039, 2.39813995, 1.51830284, 1.49665158, 1.83471939,
       0.92438267, 1.92717893, 2.10213207, 0.8475189 , 2.69958127,
       3.86069469, 1.37662082, 1.58949466, 2.28229142, 1.30541143,
       0.83971107, 1.65013252, 1.50123568, 2.74753226, 3.08213407,
       1.46163701, 2.51532712, 2.43369066, 1.21324519, 1.01888391,
       1.02891388, 0.71159343])

In [276]:
diff = np.linalg.norm(numgrad - grad)/(np.linalg.norm(numgrad+grad))
print('relative difference should be less 1e-9:  ', diff)

relative difference should be less 1e-9:   1.815838000409585e-16


---
## 2.4 Learning movie recommendations

In [339]:
f = open("movie_ids.txt", mode ="rb")

In [340]:
for s in f.readlines():
    print('movies:',s.decode())

movies: 1 Toy Story (1995)

movies: 2 GoldenEye (1995)

movies: 3 Four Rooms (1995)

movies: 4 Get Shorty (1995)

movies: 5 Copycat (1995)

movies: 6 Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)

movies: 7 Twelve Monkeys (1995)

movies: 8 Babe (1995)

movies: 9 Dead Man Walking (1995)

movies: 10 Richard III (1995)

movies: 11 Seven (Se7en) (1995)

movies: 12 Usual Suspects, The (1995)

movies: 13 Mighty Aphrodite (1995)

movies: 14 Postino, Il (1994)

movies: 15 Mr. Holland's Opus (1995)

movies: 16 French Twist (Gazon maudit) (1995)

movies: 17 From Dusk Till Dawn (1996)

movies: 18 White Balloon, The (1995)

movies: 19 Antonia's Line (1995)

movies: 20 Angels and Insects (1995)

movies: 21 Muppet Treasure Island (1996)

movies: 22 Braveheart (1995)

movies: 23 Taxi Driver (1976)

movies: 24 Rumble in the Bronx (1995)

movies: 25 Birdcage, The (1996)

movies: 26 Brothers McMullen, The (1995)

movies: 27 Bad Boys (1995)

movies: 28 Apollo 13 (1995)

movies: 29 Batman Forever (

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 7: invalid continuation byte