In [None]:
import scipy.io
import tensorflow as tf
import numpy as np

In [None]:
movies = scipy.io.loadmat('ex8_movies.mat')

R_train = movies['R']
Y_train = movies['Y']

print('R shape is: ' + str(R_train.shape))
print('Y shape is: ' + str(Y_train.shape))

In [None]:
def create_placeholders(num_movies, num_users):
    """
    Creates the placeholders for the tensorflow session.

    Arguments:
    num_movies -- scalar, number of movies
    num_users -- scalar, number of users

    Returns:
    R -- placeholder for the binary-valued indicator matrix, of shape [num_movies, num_users] and dtype "float"
    Y -- placeholder for the ratings, of shape [num_movies, num_users] and dtype "float"
    """
    R = tf.placeholder(tf.float32, [num_movies, num_users], name='R')
    Y = tf.placeholder(tf.float32, [num_movies, num_users], name='Y')

    return R, Y

def initialize_parameters(num_movies, num_users, num_features):
    """
    Initializes weight parameters to build a neural network with tensorflow. The shapes are:
                        X : [num_movies, num_features]
                        Theta : [num_users, num_features]
    Returns:
    parameters -- a dictionary of tensors containing X, Theta
    """

    with tf.variable_scope("cofi-model", reuse=tf.AUTO_REUSE):
        X = tf.get_variable('X-{0}-{1}'.format(num_movies, num_features), [num_movies, num_features], initializer=tf.contrib.layers.xavier_initializer(seed=0))
        Theta = tf.get_variable('Theta-{0}-{1}'.format(num_movies, num_features), [num_users, num_features], initializer=tf.contrib.layers.xavier_initializer(seed=0))

        parameters = {"X": X,
                      "Theta": Theta}

    return parameters

def compute_cost(R, Y, parameters):
    X = parameters['X']
    Theta = parameters['Theta']
    ones = tf.ones_like(R)
    mask = tf.equal(R, ones)
    return tf.reduce_sum(tf.square(tf.boolean_mask(tf.subtract(tf.matmul(X, tf.transpose(Theta)), Y), mask))) / 2.

def model(R_train, Y_train, num_features=10, num_iter=1000, learning_rate=0.0001, print_cost=True):
    assert(R_train.shape == Y_train.shape)
    num_movies, num_users = Y_train.shape

    R, Y = create_placeholders(num_movies, num_users)
    parameters = initialize_parameters(num_movies, num_users, num_features)
    X = parameters['X']
    Theta = parameters['Theta']

    cost = compute_cost(R, Y, parameters)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for i in range(num_iter):
            _, curr_cost = sess.run([optimizer, cost], feed_dict={R: R_train, Y: Y_train})
            if print_cost and i % 100 == 0:
                print(str(i) + ' iter, cost = ' + str(curr_cost))
        return sess.run([X, Theta])

def load_movie_list(fname):
    movie_list = []
    with open(fname, encoding='latin-1') as f:
        for line in f:
            words = line.split(' ')
            movie_list.append(' '.join(words[1:]).strip())
    return movie_list

def normalize_ratings(Y, R):
    m, n = Y.shape
    Ymean = np.zeros((m, 1));
    Ynorm = np.zeros(Y.shape);
    for i in range(m):
        idx = np.argwhere(R[i, :] == 1)
        Ymean[i] = np.mean(Y[i, idx])
        Ynorm[i, idx] = Y[i, idx] - Ymean[i]
    return Ymean, Ynorm

In [None]:
def check_cost():
    num_users = 4
    num_movies = 5
    num_features = 3;

    R, Y = create_placeholders(num_movies, num_users)
    parameters = initialize_parameters(num_movies, num_users, num_features)

    movie_params = scipy.io.loadmat('ex8_movieParams.mat')
    movies = scipy.io.loadmat('ex8_movies.mat')

    R_train = movies['R'][0:num_movies, 0:num_users]
    Y_train = movies['Y'][0:num_movies, 0:num_users]

    with tf.Session() as sess:
        parameters['X'] = parameters['X'].assign(movie_params['X'][0:num_movies, 0:num_features])
        parameters['Theta'] = parameters['Theta'].assign(movie_params['Theta'][0:num_users, 0:num_features])
        cost = sess.run(compute_cost(R, Y, parameters), feed_dict={R: R_train, Y: Y_train})
        print('The cost is: ' + str(cost))


In [None]:
check_cost()

The cost should be around 22.22

In [None]:
movie_list = load_movie_list('movie_ids.txt')

# Initialize my ratings
my_ratings = np.zeros([1682, 1]);

# Check the file movie_idx.txt for id of each movie in our dataset
# For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
my_ratings[0] = 4.

# Or suppose did not enjoy Silence of the Lambs (1991), you can set
my_ratings[97] = 2.

# We have selected a few movies we liked / did not like and the ratings we
# gave are as follows:
my_ratings[6]   = 3.
my_ratings[11]  = 5.
my_ratings[53]  = 4.
my_ratings[63]  = 5.
my_ratings[65]  = 3.
my_ratings[68]  = 5.
my_ratings[182] = 4.
my_ratings[225] = 5.
my_ratings[354] = 5.

In [None]:
Y_train = np.concatenate((my_ratings, Y_train), axis=1)
R_train = np.concatenate((my_ratings != 0, R_train), axis=1)

print('The new R shape is: ' + str(R_train.shape))
print('The new Y shape is: ' + str(Y_train.shape))

In [None]:
Ymean, Ynorm = normalize_ratings(Y_train, R_train)

In [None]:
X, Theta = model(R_train, Ynorm, num_features=100, num_iter=100)

In [None]:
p = np.matmul(X, Theta.T)
predictions = np.reshape(p[:, 0], Ymean.shape[0]) + np.reshape(Ymean, Ymean.shape[0])
idx = list(reversed(np.argsort(predictions)))
print('Top recommendations for you:')
for i in range(10):
    j = idx[i]
    print('Predicting rating {0:.1f} for movie {1}'.format(predictions[j], movie_list[j]))

print('Original ratings provided:');
for i, rating in enumerate(my_ratings.reshape(1682)):
    if rating > 0:
        print('Rated {0:.1f} for {1}'.format(rating, movie_list[i]))