In [1]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np

# Plotting library
from matplotlib import pyplot
import matplotlib as mpl

# Optimization module in scipy
from scipy import optimize

# will be used to load MATLAB mat datafile format
from scipy.io import loadmat,savemat

# library written for this exercise providing additional functions for assignment submission, and others
import utils

# tells matplotlib to embed plots within the notebook
%matplotlib inline

In [2]:
def cofiCostFunc(params, Y, R, num_users, num_movies,
                      num_features, lambda_=0.0):
    
    # Unfold the U and W matrices from params
    X = params[:num_movies*num_features].reshape(num_movies, num_features)
    Theta = params[num_movies*num_features:].reshape(num_users, num_features)
    J = 0
    X_grad = np.zeros(X.shape)
    Theta_grad = np.zeros(Theta.shape)
    J = np.sum(((X@Theta.T-Y)**2)*R)/2
    cost_reg = (lambda_/2)*(np.sum(Theta**2) + np.sum(X**2))
    J = J+cost_reg
    X_grad = ((X@Theta.T-Y)*R)@Theta + lambda_*X
    Theta_grad = ((X@Theta.T-Y)*R).T@X + lambda_*Theta
    
    grad = np.concatenate([X_grad.ravel(), Theta_grad.ravel()])
    return J, grad

In [3]:
def loadData():
    data = loadmat(os.path.join('Data', 'ex8_movies.mat'))
    Y, R = data['Y'], data['R']
    print("Data loaded!")
    return Y,R

In [4]:
def saveData(Y,R):
    data = {}
    data['Y'] = Y
    data['R'] = R
    savemat(os.path.join('Data', 'ex8_movies.mat'),data)
    print("Data saved!")
    

In [5]:
def loadParams():
    data = loadmat(os.path.join('Data', 'ex8_movieParams.mat'))
    print("Parameters loaded!")
    return data['X'],data['Theta'], data['num_users'], data['num_movies'], data['num_features']

In [6]:

def testParams():# Test pre-trained weights (X, Theta, num_users, num_movies, num_features)
    
    X, Theta, num_users, num_movies, num_features = loadParams()
    Y,R = loadData()
    #  Reduce the data set size so that this runs faster
    num_users = 4
    num_movies = 5
    num_features = 3

    X = X[:num_movies, :num_features]
    Theta = Theta[:num_users, :num_features]
    Y = Y[:num_movies, 0:num_users]
    R = R[:num_movies, 0:num_users]

    #  Evaluate cost function
    J, _ = cofiCostFunc(np.concatenate([X.ravel(), Theta.ravel()]),
                        Y, R, num_users, num_movies, num_features)

    print('Cost at loaded parameters:  %.2f' % J)

In [7]:
testParams()

Parameters loaded!
Data loaded!
Cost at loaded parameters:  22.22


In [8]:
def saveParams(X, Theta, num_users, num_movies, num_features): # save paramneters to file
    data = {}
    data['X'] = X
    data['Theta'] = Theta
    data['num_users'] = num_users
    data['num_movies'] = num_movies
    data['num_features'] = num_features
    savemat(os.path.join('Data', 'ex8_movieParams.mat'),data)
    print("Parameters saved!")


In [92]:
def predict(new_ratings,_id):
    Y,R = loadData()
    Y = np.hstack([Y,new_ratings[:, None]])
    R = np.hstack([R,(new_ratings[:, None]>0)])
    print("Ratings added!")
    #  Normalize Ratings
    Ynorm, Ymean = utils.normalizeRatings(Y, R)

    #  Useful Values
    num_movies, num_users = Y.shape
    num_features = 10

    # Set Initial Parameters (Theta, X)
    X = np.random.randn(num_movies, num_features)
    Theta = np.random.randn(num_users, num_features)

    initial_parameters = np.concatenate([X.ravel(), Theta.ravel()])

    # Set options for scipy.optimize.minimize
    options = {'maxiter': 100}

    # Set Regularization
    lambda_ = 10
    res = optimize.minimize(lambda x: cofiCostFunc(x, Ynorm, R, num_users,
                                                   num_movies, num_features, lambda_),
                            initial_parameters,
                            method='TNC',
                            jac=True,
                            options=options)
    theta = res.x

    # Unfold the returned theta back into U and W
    X = theta[:num_movies*num_features].reshape(num_movies, num_features)
    Theta = theta[num_movies*num_features:].reshape(num_users, num_features)

    print('Recommender system learning completed.')
    
    p = np.dot(X, Theta.T)
    print(p.shape)
    my_predictions = p[:, _id] + Ymean

    movieList = utils.loadMovieList()

    ix = np.argsort(my_predictions)[::-1]

    print('Top recommendations for you:')
    print('----------------------------')
    for i in range(20):
        j = ix[i]
        print('Predicting rating %.1f for movie %s' % (my_predictions[j], movieList[j]))

    


In [94]:

movieList = utils.loadMovieList()
n_m = len(movieList)

my_ratings = np.zeros(n_m)
my_ratings[0] = 2

# Or suppose did not enjoy Silence of the Lambs (1991), you can set
my_ratings[97] = 1

# We have selected a few movies we liked / did not like and the ratings we
# gave are as follows:
my_ratings[6] = 3
my_ratings[11]= 1
my_ratings[53] = 1
my_ratings[63] = 1
my_ratings[65] = 1
my_ratings[68] = 1
my_ratings[182] = 1
my_ratings[225] = 1
my_ratings[354] = 1
my_ratings[600] = 5


predict(my_ratings,943)

Data loaded!
Ratings added!
Recommender system learning completed.
(1665, 944)
Top recommendations for you:
----------------------------
Predicting rating 5.1 for movie Star Kid
Predicting rating 5.1 for movie Entertaining Angels: The Dorothy Day Story
Predicting rating 5.0 for movie They Made Me a Criminal
Predicting rating 5.0 for movie Someone Else's America
Predicting rating 5.0 for movie Great Day in Harlem A
Predicting rating 4.9 for movie Santa with Muscles
Predicting rating 4.9 for movie Saint of Fort Washington
Predicting rating 4.9 for movie Marlene Dietrich: Shadow and Light
Predicting rating 4.9 for movie Prefontaine
Predicting rating 4.6 for movie Some Mother's Son
Predicting rating 4.5 for movie Everest
Predicting rating 4.5 for movie Letter From Death Row A
Predicting rating 4.5 for movie Pather Panchali
Predicting rating 4.4 for movie Kaspar Hauser
Predicting rating 4.4 for movie Sunset Blvd.
Predicting rating 4.4 for movie Anna
Predicting rating 4.3 for movie Wallace &

In [95]:
movieList = utils.loadMovieList()
n_m = len(movieList)

my_ratings = np.zeros(n_m)
my_ratings[0] = 4

# Or suppose did not enjoy Silence of the Lambs (1991), you can set
my_ratings[97] = 2

# We have selected a few movies we liked / did not like and the ratings we
# gave are as follows:
my_ratings[6] = 3
my_ratings[11]= 5
my_ratings[53] = 4
my_ratings[63] = 5
my_ratings[65] = 3
my_ratings[68] = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354] = 5

predict(my_ratings,943)

Data loaded!
Ratings added!
Recommender system learning completed.
(1665, 944)
Top recommendations for you:
----------------------------
Predicting rating 5.0 for movie Santa with Muscles
Predicting rating 5.0 for movie Great Day in Harlem A
Predicting rating 5.0 for movie Saint of Fort Washington
Predicting rating 5.0 for movie They Made Me a Criminal
Predicting rating 5.0 for movie Prefontaine
Predicting rating 5.0 for movie Star Kid
Predicting rating 5.0 for movie Marlene Dietrich: Shadow and Light
Predicting rating 5.0 for movie Someone Else's America
Predicting rating 5.0 for movie Entertaining Angels: The Dorothy Day Story
Predicting rating 4.7 for movie Star Wars
Predicting rating 4.6 for movie Pather Panchali
Predicting rating 4.6 for movie Shawshank Redemption
Predicting rating 4.5 for movie Raiders of the Lost Ark
Predicting rating 4.5 for movie Anna
Predicting rating 4.5 for movie Maya Lin: A Strong Clear Vision
Predicting rating 4.5 for movie Some Mother's Son
Predicting ra