In [214]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io

data = scipy.io.loadmat("ex8_movies.mat")
Y = data["Y"] # num_movies x num_users
R = data["R"] # num_movies x num_users

print("Y:\n", Y, "\n\nR:\n", R)
print("\nAverage rating for movie 1 (Toy Story): ", np.mean(Y[0, np.nonzero(R[0])]))
# np.nonzero returns list of indices where element in R is non-zero

Y:
 [[5 4 0 ..., 5 0 0]
 [3 0 0 ..., 0 0 5]
 [4 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]] 

R:
 [[1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 0 1]
 [1 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]]

Average rating for movie 1 (Toy Story):  3.87831858407


In [215]:
weights = scipy.io.loadmat("ex8_movieParams.mat")

X = weights["X"]
# i-th row of X corresponds to feature vector x^(i) for i-th movie
# in this case, x^(i) has 100 features
# therefore num_movies x 100 matrix

Theta = weights["Theta"]
# j-th row of Theta corresponds to parameter vector Theta^(j) for j-th user
# in this case, Theta^(i) also has 100 features
# therefore num_users x 100 matrix

In [216]:
# reducing data set to run faster
num_users = 4
num_movies = 5
num_features = 3

X = X[:num_movies, :num_features]
Theta = Theta[:num_users, :num_features]
Y = Y[:num_movies, :num_users]
R = R[:num_movies, :num_users]

params = np.concatenate((np.ravel(X),np.ravel(Theta)))

params

array([ 1.0486855 , -0.40023196,  1.19411945,  0.78085123, -0.38562591,
        0.52119779,  0.64150886, -0.54785385, -0.08379638,  0.45361782,
       -0.80021844,  0.68048129,  0.93753789,  0.1060899 ,  0.36195295,
        0.28544362, -1.68426509,  0.26293877,  0.50501321, -0.45464846,
        0.31746244, -0.43191656, -0.47880449,  0.84671111,  0.72859839,
       -0.27189391,  0.3268436 ])

In [217]:
def cofiCostFunc(params, Y, R, num_users, num_movies, num_features,lamda):
    
    # recreate matrices from (unrolled) params
    X = np.reshape(params[:num_movies*num_features], (num_movies, num_features))
    Theta = np.reshape(params[num_movies*num_features:], (num_users, num_features))
    
    movie_by_user_table = np.dot(X, Theta.T) 
    
    movie_by_user_table = np.multiply(movie_by_user_table, R) 
    Y = np.multiply(Y, R)
    # element-wise multiplication by R to get the 
    # ratings as specified and required by R

    error_table = movie_by_user_table - Y
    cost = np.sum(np.square(error_table)) / 2
    
    # regularization
    X_reg = np.sum(np.square(X)) * lamda / 2
    Theta_reg = np.sum(np.square(Theta)) * lamda / 2
    
    return cost + X_reg + Theta_reg

print("Cost with lambda = 0: ", 
      cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 0))
print("Cost with lambda = 1.5: ", 
      cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 1.5))

Cost with lambda = 0:  22.2246037257
Cost with lambda = 1.5:  31.3440562443


In [218]:
def cofiCostGrad(params, Y, R, num_users, num_movies, num_features,lamda):
    
    # recreate matrices from (unrolled) params
    X = np.reshape(params[:num_movies*num_features], (num_movies, num_features))
    Theta = np.reshape(params[num_movies*num_features:], (num_users, num_features))
    
    movie_by_user_table = np.dot(X, Theta.T) 
    
    movie_by_user_table = np.multiply(movie_by_user_table, R) 
    Y = np.multiply(Y, R)
    # element-wise multiplication by R to get the 
    # ratings as specified and required by R

    error_table = movie_by_user_table - Y
    
    # gradients
    X_grad = np.dot(error_table, Theta) + lamda*X
    Theta_grad = np.dot(error_table.T, X) + lamda*Theta
    
    # regularization
    
    return np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

gradients = cofiCostGrad(params, Y, R, num_users, num_movies, num_features, 0)
X_grad = np.reshape(gradients[:num_movies*num_features], (num_movies, num_features))
Theta_grad = np.reshape(gradients[num_movies*num_features:], (num_users, num_features))
    
print(X_grad)
print(Theta_grad)

[[-2.52899165  7.57570308 -1.89979026]
 [-0.56819597  3.35265031 -0.52339845]
 [-0.83240713  4.91163297 -0.76677878]
 [-0.38358278  2.26333698 -0.35334048]
 [-0.80378006  4.74271842 -0.74040871]]
[[-10.5680202    4.62776019  -7.16004443]
 [ -3.05099006   1.16441367  -3.47410789]
 [  0.           0.           0.        ]
 [  0.           0.           0.        ]]


In [219]:
def gradientCheck(params,Y, R, num_users, num_movies, num_features,lamda):

    # note that backprop returns a VECTOR of gradients
    # whereas nnCostFunction returns a NUMBER
    
    n = len(params)
    epsilon = 0.0001
    gradApprox = []
    for j in range(n):
        thetaPlus = np.copy(params)
        thetaPlus[j] += epsilon
        thetaMinus = np.copy(params)
        thetaMinus[j] -= epsilon
        thetaPlusCost = cofiCostFunc(thetaPlus,Y, R, num_users, num_movies, num_features,lamda)
        thetaMinusCost = cofiCostFunc(thetaMinus,Y, R, num_users, num_movies, num_features,lamda)
        gradApprox.append((thetaPlusCost - thetaMinusCost) / (2*epsilon))
    return gradApprox

approx_grads = gradientCheck(params, Y, R, num_users, num_movies, num_features, 0)
computed_grads = np.concatenate((X_grad.ravel(), Theta_grad.ravel()))

df = pd.DataFrame({"computed gradient": computed_grads,
                   "approx gradient": approx_grads,  
                   "difference": approx_grads - computed_grads})
df

Unnamed: 0,approx gradient,computed gradient,difference
0,-2.528992,-2.528992,-8.993695e-12
1,7.575703,7.575703,-1.076383e-11
2,-1.89979,-1.89979,1.394573e-11
3,-0.568196,-0.568196,2.260858e-12
4,3.35265,3.35265,6.038281e-12
5,-0.523398,-0.523398,7.516099e-12
6,-0.832407,-0.832407,8.02125e-12
7,4.911633,4.911633,-7.728929e-12
8,-0.766779,-0.766779,-3.846923e-13
9,-0.383583,-0.383583,-6.675605e-12


In [221]:
gradients2 = cofiCostGrad(params, Y, R, num_users, num_movies, num_features, 1.5)
approx_grads2 = gradientCheck(params, Y, R, num_users, num_movies, num_features, 1.5)

df = pd.DataFrame({"computed gradient": gradients2,
                   "approx gradient": approx_grads2,  
                   "difference": approx_grads2 - gradients2})
df

Unnamed: 0,approx gradient,computed gradient,difference
0,-0.955963,-0.955963,-8.278045e-12
1,6.975355,6.975355,-1.377121e-11
2,-0.108611,-0.108611,2.260192e-11
3,0.603081,0.603081,6.078249e-12
4,2.774211,2.774211,6.099121e-12
5,0.258398,0.258398,1.314182e-11
6,0.129856,0.129856,5.73952e-12
7,4.089852,4.089852,-7.208456e-12
8,-0.892473,-0.892473,-3.664402e-12
9,0.296844,0.296844,-1.194028e-11


In [222]:
with open("movie_ids.txt", "r", encoding = "ISO-8859-1") as f:
    contents = f.read().split("\n")

movieDf = pd.DataFrame([line.split(" ", 1) for line in contents])
movieDf.columns = ["index (in Octave)", "title"]
movieDf

Unnamed: 0,index (in Octave),title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)
5,6,Shanghai Triad (Yao a yao yao dao waipo qiao) ...
6,7,Twelve Monkeys (1995)
7,8,Babe (1995)
8,9,Dead Man Walking (1995)
9,10,Richard III (1995)


In [223]:
my_ratings = np.zeros((1682,1))
my_ratings[0]   = 4
my_ratings[97]  = 2
my_ratings[6]   = 3
my_ratings[11]  = 5
my_ratings[53]  = 4
my_ratings[63]  = 5
my_ratings[65]  = 3
my_ratings[68]  = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354] = 5

for i in range(1682):
    if my_ratings[i] > 0:
        print("Rated %s for %s" % (my_ratings[i][0], movieDf["title"][i]))

Rated 4.0 for Toy Story (1995)
Rated 3.0 for Twelve Monkeys (1995)
Rated 5.0 for Usual Suspects, The (1995)
Rated 4.0 for Outbreak (1995)
Rated 5.0 for Shawshank Redemption, The (1994)
Rated 3.0 for While You Were Sleeping (1995)
Rated 5.0 for Forrest Gump (1994)
Rated 2.0 for Silence of the Lambs, The (1991)
Rated 4.0 for Alien (1979)
Rated 5.0 for Die Hard 2 (1990)
Rated 5.0 for Sphere (1998)


In [240]:
# training collaborative filtering
data = scipy.io.loadmat("ex8_movies.mat")
Y = data["Y"] # num_movies x num_users
R = data["R"] # num_movies x num_users

Y = np.concatenate((my_ratings, Y), axis=1)
R = np.concatenate(((my_ratings > 0).astype(int), R), axis=1)

def normalizeRatings(Y,R):
    m = Y.shape[0]
    Ymean = np.zeros(m)
    Ynorm = np.zeros(Y.shape)
    for i in range(m):
        idx = np.nonzero(R[i])
        Ymean[i] = np.mean(Y[i, idx])
        Ynorm[i, idx] = Y[i, idx] - Ymean[i]
    return Ynorm, Ymean

Ynorm, Ymean = normalizeRatings(Y,R)
print("Ymean:\n", Ymean, "\n\nYnorm:\n", Ynorm)

Ymean:
 [ 3.8785872   3.20610687  3.03333333 ...,  2.          3.          3.        ] 

Ynorm:
 [[ 0.1214128   1.1214128   0.1214128  ...,  1.1214128   0.          0.        ]
 [ 0.         -0.20610687  0.         ...,  0.          0.          1.79389313]
 [ 0.          0.96666667  0.         ...,  0.          0.          0.        ]
 ..., 
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.          0.          0.         ...,  0.          0.          0.        ]]


In [225]:
num_users = Y.shape[1]
num_movies = Y.shape[0]
num_features = 10

X = np.random.rand(num_movies, num_features)
Theta = np.random.rand(num_users, num_features)

inital_parameters = np.concatenate((X.ravel(), Theta.ravel()))

from scipy.optimize import fmin_tnc
lamda = 10
minimum = fmin_tnc(func=cofiCostFunc, x0=inital_parameters, fprime=cofiCostGrad, 
                   args=(Y, R, num_users, num_movies, num_features,lamda))


In [258]:
X = np.reshape(minimum[0][:num_movies*num_features],
               (num_movies, num_features))
Theta = np.reshape(minimum[0][num_movies*num_features:],
                   (num_users, num_features))

p = np.dot(X, Theta.T)
my_predictions = p[:,0] + Ymean
pred_ind_sorted = np.argsort(-my_predictions)

print("Top recommendations for you:")
for i in pred_ind_sorted[:10]:
    print("Predicting rating %s for movie %s" 
          % (my_predictions[i], movieDf["title"][i]))

Top recommendations for you:
Predicting rating 8.55250067439 for movie Star Wars (1977)
Predicting rating 8.4476138833 for movie Titanic (1997)
Predicting rating 8.36037270822 for movie Shawshank Redemption, The (1994)
Predicting rating 8.25554569465 for movie Schindler's List (1993)
Predicting rating 8.23860312834 for movie Raiders of the Lost Ark (1981)
Predicting rating 8.17244959884 for movie Good Will Hunting (1997)
Predicting rating 8.11277406646 for movie Usual Suspects, The (1995)
Predicting rating 8.10973655322 for movie Empire Strikes Back, The (1980)
Predicting rating 8.06530480566 for movie Godfather, The (1972)
Predicting rating 8.05712577671 for movie Braveheart (1995)
