# Recommender Systems

## 1. Movie Ratings dataset

In [1]:
import scipy.io

In [2]:
mat = scipy.io.loadmat('ex8_movies.mat')

In [82]:
mat.keys()

dict_keys(['__header__', '__version__', '__globals__', 'Y', 'R'])

In [83]:
Y = mat['Y'] #row: movies, col: people
Y.shape

(1682, 943)

In [84]:
R = mat['R']
R.shape

(1682, 943)

In [6]:
import numpy as np

In [7]:
index = np.argwhere(R[0]==1)
av = np.mean(Y[0, index])
print('average rating for movie 1 (toy story):',av)

average rating for movie 1 (toy story): 3.8783185840707963


In [8]:
y = np.mat(Y)

In [9]:
import cv2

In [10]:
ys = cv2.resize(y, (540,480))
ys = ys/np.max(y)
cv2.imshow('image',ys)
cv2.waitKey(0)
cv2.destroyAllWindows()

## 2. Collaborative filtering learning algorithm

### 2.1 Collaborative filtering cost function

In [11]:
mat2 = scipy.io.loadmat('ex8_movieParams.mat')

In [12]:
mat2.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'Theta', 'num_users', 'num_movies', 'num_features'])

In [170]:
X = mat2['X']
Theta = mat2['Theta']
num_users = mat2['num_users'][0][0]
num_movies = mat2['num_movies'][0][0]
num_features = mat2['num_features'][0][0]
print('X:',X.shape)
print('Theta:',Theta.shape)
print('num_users:',num_users)
print('num_movies:',num_movies)
print('num_features:',num_features)

X: (1682, 10)
Theta: (943, 10)
num_users: 943
num_movies: 1682
num_features: 10


In [171]:
param = np.concatenate((X,Theta), axis = None)
param.shape

(26250,)

In [176]:
theta = param[num_movies*num_features:].reshape(num_users, num_features)
theta

array([[ 0.28544362, -1.68426509,  0.26293877, ...,  0.76723235,
        -1.10460164, -0.25186708],
       [ 0.50501321, -0.45464846,  0.31746244, ...,  1.09306336,
        -1.20029436, -0.39161676],
       [-0.43191656, -0.47880449,  0.84671111, ...,  1.36333976,
        -0.32523542, -0.19468212],
       ...,
       [ 1.0586926 , -0.8087176 ,  0.56935771, ...,  0.80421422,
        -0.74346778, -0.11985885],
       [ 0.89058932, -0.12894734,  0.35560466, ...,  1.37426807,
        -0.7526549 , -0.81135311],
       [ 0.82414136, -0.38892594,  1.06940502, ...,  1.28423703,
        -0.98349993, -0.53184838]])

In [177]:
Theta

array([[ 0.28544362, -1.68426509,  0.26293877, ...,  0.76723235,
        -1.10460164, -0.25186708],
       [ 0.50501321, -0.45464846,  0.31746244, ...,  1.09306336,
        -1.20029436, -0.39161676],
       [-0.43191656, -0.47880449,  0.84671111, ...,  1.36333976,
        -0.32523542, -0.19468212],
       ...,
       [ 1.0586926 , -0.8087176 ,  0.56935771, ...,  0.80421422,
        -0.74346778, -0.11985885],
       [ 0.89058932, -0.12894734,  0.35560466, ...,  1.37426807,
        -0.7526549 , -0.81135311],
       [ 0.82414136, -0.38892594,  1.06940502, ...,  1.28423703,
        -0.98349993, -0.53184838]])

In [234]:
def cofiCostFunc(param, Y, R, num_users, num_movies, num_features):
    X = param[:num_movies*num_features].reshape(num_movies, num_features)
    Theta = param[num_movies*num_features:].reshape(num_users, num_features)
    diff = np.dot(X,Theta.transpose()) - Y
    J = np.sum( np.square( diff*R ))*0.5
    gradX = np.dot((diff*R),Theta)
    gradTheta = np.dot((diff*R).transpose(), X)
    return [J, np.concatenate((gradX, gradTheta), axis = None)] #make grad as one col

In [228]:
nmov = 5
nuser = 4
nfea = 3
J, grad = cofiCostFunc(np.concatenate((X[0:nmov,0:nfea], Theta[0:nuser, 0:nfea]), axis = None),\
                 Y[0:nmov, 0:nuser], R[0:nmov, 0:nuser], nuser, nmov, nfea)
print('cost at loaded parameters:',J)

ValueError: cannot reshape array of size 9 into shape (4,3)

## 2.2 Collaborative filtering gradient

In [187]:
J, grad = cofiCostFunc(np.concatenate((X, Theta), axis = None), Y, R, num_users, num_movies, num_features)

In [188]:
print('J:',J)
print('grad.shape:', grad.shape)

J: 27918.64012454421
grad.shape: (26250,)


## 2.3 Gradient Checking

### 2.3.1 Create small size sets

In [257]:
num_movies = 4
num_features = 3
num_users = 5
X_t = np.random.rand(num_movies,num_features)
Theta_t = np.random.rand(num_users,num_features)
Y = np.dot(X_t, Theta_t.transpose())
print('X:',X_t)
print('Theta:',Theta_t)
print('Y:',Y)

X: [[0.41726386 0.71264126 0.75977636]
 [0.04533233 0.25978974 0.99071713]
 [0.25367465 0.1599984  0.68201261]
 [0.01836834 0.48924468 0.80094903]]
Theta: [[0.9520208  0.45507959 0.05173047]
 [0.67904125 0.85158138 0.29265336]
 [0.95867711 0.5524942  0.76815891]
 [0.23078723 0.34677749 0.97503702]
 [0.16861148 0.76262338 0.12070809]]
Y: [[0.76085596 1.1125625  1.37738045 1.08423719 0.70554352]
 [0.21263259 0.54195132 0.94801958 1.06653724 0.32535286]
 [0.34959639 0.5081005  0.85548434 0.77901626 0.24711542]
 [0.28156578 0.66350494 0.90317028 0.95485317 0.47288757]]


### 2.3.2 Create missing data

In [258]:
Y[Y>0.5]=0
R = np.zeros(Y.shape)
R[Y != 0]=1
print('Y:',Y)
print('R:',R)

Y: [[0.         0.         0.         0.         0.        ]
 [0.21263259 0.         0.         0.         0.32535286]
 [0.34959639 0.         0.         0.         0.24711542]
 [0.28156578 0.         0.         0.         0.47288757]]
R: [[0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 0. 0. 1.]]


In [259]:
X = np.random.rand(num_movies,num_features)
Theta = np.random.rand(num_users,num_features)

### 2.3.3 Implement Gradient Checking

In [268]:
theta = np.concatenate((X,Theta), axis = None)
numgrad = np.zeros(theta.size)
e = math.exp(1e-4)
perturb = np.zeros(theta.size)
for i in range(0, theta.size):
    perturb[i] = e
    tmp = theta - perturb
    loss1, grad = cofiCostFunc(tmp, Y, R, num_users, num_movies, num_features)
    tmp = theta + perturb
    loss2, grad = cofiCostFunc(tmp, Y, R, num_users, num_movies, num_features)
    numgrad[i] = (loss2-loss1)/(2*e)
    perturb[i] = 0

In [269]:
numgrad

array([0.        , 0.        , 0.        , 0.55319049, 0.15676137,
       0.71256401, 0.18711234, 0.04448443, 0.20500591, 0.28147406,
       0.09380208, 0.42177617, 1.17878291, 0.97726549, 1.18798178,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.46950719,
       0.47147721, 0.56878213])

In [270]:
J, grad = cofiCostFunc(theta, Y, R, num_users, num_movies, num_features)
grad

array([0.        , 0.        , 0.        , 0.55319049, 0.15676137,
       0.71256401, 0.18711234, 0.04448443, 0.20500591, 0.28147406,
       0.09380208, 0.42177617, 1.17878291, 0.97726549, 1.18798178,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.46950719,
       0.47147721, 0.56878213])

In [271]:
diff = np.linalg.norm(numgrad - grad)/(np.linalg.norm(numgrad+grad))
print('relative difference should be less 1e-9:  ', diff)

relative difference should be less 1e-9:   1.390843401057588e-16
