Data set: movie data provided by Ruslan Salakhutdinov

https://www.cs.toronto.edu/~rsalakhu/BPMF.html

moviedata_url = 'https://www.cs.toronto.edu/~rsalakhu/code_BPMF/moviedata.mat'

In [1]:
import tensorflow as tf
import scipy.io
import numpy as np

# TensorFlow implementation of PMF 

In [2]:
class PMF:
    """Build the graph for Probabilistic Matrix Factorization"""
    
    def __init__(self, num_users, num_items, rank, batch_size,learning_rate,lambda_):
        self.num_users = num_users
        self.num_items = num_items
        self.rank = rank
        self.batch_size = batch_size
        self.lr = learning_rate
        self.lambda_ = lambda_
        self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
        
    def _create_placeholders(self):
        """Step1: define place holders for input and output"""
        with tf.name_scope("data"):
            self.X_user = tf.placeholder(tf.int32, shape = [self.batch_size], name = "X_user")
            self.X_item = tf.placeholder(tf.int32, shape = [self.batch_size], name = "X_item")
            self.Y = tf.placeholder(tf.int32, shape = [self.batch_size], name = "Y")
    
    def _create_embedding(self):
        """Step2: define embedding of users and items"""
        with tf.device('/cpu:0'):
            with tf.name_scope("embed"):
                self.U = tf.Variable(tf.random_uniform([self.num_users,self.rank],-1.0,1.0)
                                     ,name = "user_embed")
                self.V = tf.Variable(tf.random_uniform([self.num_items, self.rank],-1.0,1.0)
                                    , name = "item_embed")
            
    
    def _create_loss(self):
        """Step3: define the model and create the loss"""
        with tf.device('/cpu:0'):
            with tf.name_scope("loss"):
                # get innner product of user_embed and item_embed of input data
                self.X_user_embed = tf.nn.embedding_lookup(self.U, self.X_user, name = "X_user_embed")
                self.X_item_embed = tf.nn.embedding_lookup(self.V, self.X_item, name = "X_item_embed")
                self.pred = tf.reduce_sum(self.X_user_embed * self.X_item_embed,1)
                # define loss function under Gaussian assumption
                self.loss = tf.nn.l2_loss(self.pred - tf.to_float(self.Y)) \
                            + self.lambda_ * tf.nn.l2_loss(self.X_user_embed) \
                            + self.lambda_ * tf.nn.l2_loss(self.X_item_embed)
            
    def _create_optimizer(self):
        """ Step 5: define optimizer """
        with tf.device('/cpu:0'):
            self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.loss, var_list = [self.U, self.V], global_step=self.global_step)

    def _create_summaries(self):
        with tf.name_scope("summaries"):
            tf.summary.scalar("loss", self.loss)
            tf.summary.histogram("histogram_loss", self.loss)
            # because you have several summaries, we should merge them all
            # into one op to make it easier to manage
            self.summary_op = tf.summary.merge_all()

    def build_graph(self):
        """ Build the graph for our model """
        self._create_placeholders()
        self._create_embedding()
        self._create_loss()
        self._create_optimizer()
        self._create_summaries()

In [3]:

mat = scipy.io.loadmat('moviedata.mat')['train_vec'] # Y takes value from {1,2,3,4,5}
mat = np.array(mat).astype(np.int32)
train_data = mat[:len(mat) / 5 * 4]
test_data = mat[len(mat) / 5 * 4+1:]

num_users = 3952
num_items = 6040
rank = 10
batch_size = 10000 
learning_rate = .0001
lambda_ = 0.01 #regularization parameter
n_epochs = 100
N = train_data.shape[0] #900000
n_batches = N / batch_size

model_pmf = PMF(num_users, num_items, rank, batch_size, learning_rate, lambda_)
model_pmf.build_graph()

with tf.Session() as sess:
    tf.global_variables_initializer()
    tf.local_variables_initializer()
    for epoch_idx in xrange(n_epochs):
        loss_tracker = 0.
        for batch_idx in xrange(n_batches): 
            X_user_batch = train_data[batch_idx*batch_size:(batch_idx+1)*batch_size,0]
            X_item_batch = train_data[batch_idx*batch_size:(batch_idx+1)*batch_size,1]
            Y_batch = train_data[batch_idx*batch_size:(batch_idx+1)*batch_size,2]
            print X_user_batch.shape
            # perform update
            loss_batch, _, summary = sess.run([model_pmf.loss, model_pmf.optimizer, model_pmf.summary_op], 
                                  feed_dict={model_pmf.X_user: X_user_batch,
                                             model_pmf.X_item: X_item_batch, 
                                             model_pmf.Y: Y_batch})
            loss_tracker += loss_batch
        if (epoch_idx+1) % 10 == 0:
            print "Epoch %d. Obj per batch: %.3f" %(epoch_idx+1, loss_tracker/n_batches)
print "Optimizaiton finished!"

(10000,)


FailedPreconditionError: Attempting to use uninitialized value embed/item_embed
	 [[Node: embed/item_embed/read = Identity[T=DT_FLOAT, _class=["loc:@embed/item_embed"], _device="/job:localhost/replica:0/task:0/cpu:0"](embed/item_embed)]]

Caused by op u'embed/item_embed/read', defined at:
  File "/anaconda/envs/Python27/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/anaconda/envs/Python27/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-aa07c864ff23>", line 18, in <module>
    model_pmf.build_graph()
  File "<ipython-input-2-8a8a427e115b>", line 57, in build_graph
    self._create_embedding()
  File "<ipython-input-2-8a8a427e115b>", line 27, in _create_embedding
    , name = "item_embed")
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 197, in __init__
    expected_shape=expected_shape)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 316, in _init_from_args
    self._snapshot = array_ops.identity(self._variable, name="read")
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1338, in identity
    result = _op_def_lib.apply_op("Identity", input=input, name=name)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/anaconda/envs/Python27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value embed/item_embed
	 [[Node: embed/item_embed/read = Identity[T=DT_FLOAT, _class=["loc:@embed/item_embed"], _device="/job:localhost/replica:0/task:0/cpu:0"](embed/item_embed)]]
