In [6]:
from jupyterthemes import get_themes
from jupyterthemes.stylefx import set_nb_theme
themes = get_themes()
set_nb_theme(themes[1])

In [1]:
import tensorflow as tf

In [2]:
import tensorflow as tf
import numpy as np

def body(x):
    a = tf.random_uniform(shape=[2, 2], dtype=tf.int32, maxval=100)
    b = tf.constant(np.array([[1, 2], [3, 4]]), dtype=tf.int32)
    c = a + b
    return tf.nn.relu(x + c)

def condition(x):
    return tf.reduce_sum(x) < 100

x = tf.Variable(tf.constant(0, shape=[2, 2]))

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    result = tf.while_loop(condition, body, [x])
    print(sess.run(result))

[[69 47]
 [47 27]]


In [5]:
??tf.contrib.metrics.streaming_auc

In [2]:
tf.InteractiveSession()

<tensorflow.python.client.session.InteractiveSession at 0x10ff79e48>

In [6]:
tf.zeros(5)[2:3]

<tf.Tensor 'strided_slice_1:0' shape=(1,) dtype=float32>

In [None]:

import numpy as np
import tensorflow as tf
from tqdm import trange
from itertools import islice
from sklearn.base import BaseEstimator
from sklearn.preprocessing import normalize
from sklearn.neighbors import NearestNeighbors

class BPR(BaseEstimator):
    """
    Bayesian Personalized Ranking (BPR) for implicit feedback data
    using tensorflow as backend
    
    Inherit scikit-learn's BaseEstimator to get some free stuff
    - class representations, more informative when printing class
    - get, set parameters (it will raise ValueError if we pass in
      illegal parameters, while setattr will simply let it through)
    
    TODO : remove the scikit-learn dependency
    Parameters
    ----------
    learning_rate : float, default 0.01
        learning rate for the Adam optimizer
    n_factors : int, default 20
        Number/dimension of user and item latent factors
    n_iters : int, default 15
        Number of iterations to train the algorithm
        
    n_batch_size : int, default 2000
        batch size for batch gradient descent, the original paper
        uses stochastic gradient descent (i.e., batch size of 1),
        but this can make the training unstable (very sensitive to
        learning rate)
    reg : int, default 0.01
        Regularization term for the user and item latent factors
    seed : int, default 1234
        Seed for the randomly initialized user, item latent factors
    verbose : boolean, default True
        Whether to print progress bar while training
    Attributes
    ----------
    user_factors : 2d numpy array [n_users, n_factors]
        User latent factors learnt
    item_factors : 2d numpy array [n_items, n_factors]
        Item latent factors learnt
        
    item_bias : 1d numpy array [n_items]
        bias term for the items
    history : list
        Loss function's history, can be used to evaluate
        whether the algorithm converged or not
    Reference
    ---------
    S. Rendle, C. Freudenthaler, Z. Gantner, L. Schmidt-Thieme 
    Bayesian Personalized Ranking from Implicit Feedback
    - https://arxiv.org/pdf/1205.2618.pdf
    """
    def __init__(self, learning_rate = 0.01, n_factors = 15, n_iters = 10, 
                 n_batch_size = 2000, reg = 0.01, seed = 1234, verbose = True):
        self.reg = reg
        self.seed = seed
        self.verbose = verbose
        self.n_iters = n_iters
        self.n_factors = n_factors
        self.n_batch_size = n_batch_size
        self.learning_rate = learning_rate
        
    def fit(self, ratings):
        """
        Parameters
        ----------
        ratings : scipy sparse csr_matrix [n_users, n_items]
            sparse matrix of user-item interactions
        """
        # history stores the cost, allows assessing convergence
        self.history = []
        indptr = ratings.indptr
        indices = ratings.indices
        n_users, n_items = ratings.shape
        
        # ensure batch size makes sense
        batch_size = self.n_batch_size
        if ratings.nnz < batch_size:
            batch_size = ratings.nnz
            sys.stderr.write('WARNING: Batch size is greater than number of training interactions,'
                             'switching to a batch size of {}\n'.format(X.nnz))

        batch_iters = ratings.nnz // batch_size
        
        # progress bar for training iteration if verbose is turned on
        loop = range(self.n_iters)
        if self.verbose:
            loop = trange(self.n_iters, desc = self.__class__.__name__)
        
        self._build_graph(n_users, n_items)
        init = tf.global_variables_initializer()
        
        with tf.Session() as sess:
            sess.run(init)

            for _ in loop:
                iteration_cost = 0.0
                for _ in range(batch_iters):
                    sampled = self._sample(n_users, n_items, indices, indptr)
                    sampled_users, sampled_pos_items, sampled_neg_items = sampled
                    feed_dict = {self._slice_u: sampled_users, 
                                 self._slice_i: sampled_pos_items, 
                                 self._slice_j: sampled_neg_items}
                    _, cost = sess.run([self._train_step, self._total_cost], feed_dict)
                    iteration_cost += cost / self.n_batch_size

                iteration_cost /= batch_iters
                self.history.append(iteration_cost)

            self.user_factors = sess.run(self.user_factors)
            self.item_factors = sess.run(self.item_factors)
            self.item_bias = sess.run(self.item_bias)
        
        return self
        
    def _sample(self, n_users, n_items, indices, indptr):
        """sample batches of random triplets u, i, j"""
        sampled_pos_items = np.zeros(self.n_batch_size, dtype = np.int32)
        sampled_neg_items = np.zeros(self.n_batch_size, dtype = np.int32)
        sampled_users = np.random.choice(n_users, size = self.n_batch_size).astype(np.int32)

        for idx, user in enumerate(sampled_users):
            pos_items = indices[ indptr[user]:indptr[user + 1] ]
            pos_item = np.random.choice(pos_items)
            neg_item = np.random.choice(n_items)
            while neg_item in pos_items:
                neg_item = np.random.choice(n_items)

            sampled_pos_items[idx] = pos_item
            sampled_neg_items[idx] = neg_item

        return sampled_users, sampled_pos_items, sampled_neg_items       
    
    def _build_graph(self, n_users, n_items):
        """build the tensorflow computational graph"""
        # initialize random weights
        user_init = tf.truncated_normal((n_users, self.n_factors), seed = self.seed)
        item_init = tf.truncated_normal((n_items, self.n_factors), seed = self.seed)
        self.user_factors = tf.Variable(user_init, name = 'user_factors')
        self.item_factors = tf.Variable(item_init, name = 'item_factors')
        self.item_bias = tf.Variable(tf.zeros(n_items), name = 'item_bias')
        
        # the input data is the sampled batch of users and
        # its corresponding positive and negative items
        self._slice_u = tf.placeholder(tf.int32, self.n_batch_size)
        self._slice_i = tf.placeholder(tf.int32, self.n_batch_size)
        self._slice_j = tf.placeholder(tf.int32, self.n_batch_size)
        
        # use tf.gather() to select a non-contiguous slice from the tensor
        # http://stackoverflow.com/questions/35146444/tensorflow-python-accessing-individual-elements-in-a-tensor
        user_u = tf.gather(self.user_factors, self._slice_u)
        item_i = tf.gather(self.item_factors, self._slice_i)
        item_j = tf.gather(self.item_factors, self._slice_j)
        bias_i = tf.gather(self.item_bias, self._slice_i)
        bias_j = tf.gather(self.item_bias, self._slice_j)
        
        # decompose the estimator, compute the difference between
        # the score of the positive items i and negative items j
        x_ui = tf.diag_part( tf.matmul(user_u, tf.transpose(item_i)) )
        x_uj = tf.diag_part( tf.matmul(user_u, tf.transpose(item_j)) )
        x_uij = bias_i - bias_j + x_ui - x_uj

        # minimize the cost
        cost_u = self.reg * tf.reduce_sum(user_u ** 2)
        cost_i = self.reg * tf.reduce_sum(item_i ** 2) + tf.reduce_sum(bias_i ** 2)
        cost_j = self.reg * tf.reduce_sum(item_j ** 2) + tf.reduce_sum(bias_j ** 2)
        cost_uij = tf.reduce_sum( tf.log(tf.nn.sigmoid(x_uij)) )
        self._total_cost = cost_u + cost_i + cost_j - cost_uij
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self._train_step = optimizer.minimize(self._total_cost)
        return self

    def predict(self):
        """
        Obtain the predicted ratings for every users and items
        by doing a dot product of the learnt user and item vectors.
        The result will be cached to avoid re-computing 
        it every time we call predict, thus there will
        only be an overhead the first time we call it.
        Note, ideally you probably don't need to compute 
        this as it returns a dense matrix and may take
        up huge amounts of memory for large datasets
        """
        if not self._predicted:
            self._get_prediction()
            self._predicted = True

        return self._pred

    def _get_prediction(self):
        """Predicted ratings (dot product of user and item vectors)"""
        self._pred = self.user_factors.dot(self.item_factors.T) + self.item_bias
        return self

    def _predict_user(self, user):
        """
        returns the predicted ratings for the specified user,
        this is mainly used in computing evaluation metric,
        where we avoid computing the whole predicted rating matrix
        
        TODO : do we even need this in the class?
        """
        user_pred = self.user_factors[user].dot(self.item_factors.T) + self.item_bias
        return user_pred
    
    def recommend(self, ratings, N = 5, user_ids = None):
        """
        Returns the top N ranked items for given user id,
        excluding the ones that the user already liked
        
        Parameters
        ----------
        ratings : scipy sparse csr_matrix [n_users, n_items]
            sparse matrix of user-item interactions 
        
        N : int, default 5
            top-N similar items' N
            
        user_ids : 1d iterator, e.g. list or numpy array, default None
            users' id that we wish to find top-N recommended for,
            default None will find it for all users
        
        Returns
        -------
        recommendation : 2d numpy array [number of query users_ids, N]
            each row is the top-N ranked item for each query user
        """
        if user_ids is not None:
            recommendation = np.zeros((len(user_ids), N), dtype = np.int32)
            for idx, user in enumerate(user_ids):
                top_n = self._recommend_user(ratings, user, N)
                recommendation[idx] = top_n
        else:
            n_users = ratings.shape[0]
            recommendation = np.zeros((n_users, N), dtype = np.int32)
            for user in range(n_users):
                top_n = self._recommend_user(ratings, user, N)
                recommendation[user] = top_n

        return recommendation

    def _recommend_user(self, ratings, user, N):
        """the top-N ranked items for a given user"""
        scores = self._predict_user(user)

        # compute the top N items, removing the items that the user already liked
        # from the result and ensure that we don't get out of bounds error when 
        # we ask for more recommendations than that are available
        liked = set(ratings[user].indices)
        count = N + len(liked)
        if count < scores.shape[0]:

            # when trying to obtain the top-N indices from the score,
            # using argpartition to retrieve the top-N indices in 
            # unsorted order and then sort them will be faster than doing
            # straight up argort on the entire score
            # http://stackoverflow.com/questions/42184499/cannot-understand-numpy-argpartition-output
            ids = np.argpartition(scores, -count)[-count:]
            best_ids = np.argsort(scores[ids])[::-1]
            best = ids[best_ids]
        else:
            best = np.argsort(scores)[::-1]

        top_n = list( islice((rec for rec in best if rec not in liked), N) )
        return top_n
    
    def get_similar_items(self, N = 5, item_ids = None):
        """
        return the top N similar items for itemid, where
        cosine distance is used as the distance metric
        
        Parameters
        ----------
        N : int, default 5
            top-N similar items' N
            
        item_ids : 1d iterator, e.g. list or numpy array, default None
            the item ids that we wish to find the similar items
            of, the default None will compute the similar items
            for all the items
        
        Returns
        -------
        similar_items : 2d numpy array [number of query item_ids, N]
            each row is the top-N most similar item id for each
            query item id
        """
        # cosine distance is proportional to normalized euclidean distance,
        # thus we normalize the item vectors and use euclidean metric so
        # we can use the more efficient kd-tree for nearest neighbor search;
        # also the item will always to nearest to itself, so we add 1 to 
        # get an additional nearest item and remove itself at the end
        normed_item_factors = normalize(self.item_factors)
        knn = NearestNeighbors(n_neighbors = N + 1, metric = 'euclidean')
        knn.fit(normed_item_factors)

        # returns a distance, index tuple,
        # we don't actually need the distance
        if item_ids is not None:
            normed_item_factors = normed_item_factors[item_ids]

        _, items = knn.kneighbors(normed_item_factors)
        similar_items = items[:, 1:].astype(np.int32)
        return similar_items

Contact GitHub API Training Shop Blog About
© 2017 GitHub, Inc. Terms Privacy Security Status Help

In [None]:
u = tf.placeholder(tf.int32, [None])
i = tf.placeholder(tf.int32, [None])
j = tf.placeholder(tf.int32, [None])
iv = tf.placeholder(tf.float32, [None, 4096]) # change visual feature dimesion to be more flexible
jv = tf.placeholder(tf.float32, [None, 4096])

In [10]:
import numpy as np
a = np.array([[1, 0, 2], [0, 1, 3]]).T
b = np.array([[3, 4, 1], [4, 2, 2]])
a

array([[1, 0],
       [0, 1],
       [2, 3]])

In [11]:
np.dot(a, b)

array([[ 3,  4,  1],
       [ 4,  2,  2],
       [18, 14,  8]])

In [None]:
self.n_img_features = 4096
self.n_img_factors
self.reg_img

def _build_graph(self, n_users, n_items):
    """build the tensorflow computational graph"""
    # initialize random weights
    self.user_factors = tf.Variable(tf.truncated_normal((n_users, self.n_factors), seed = self.seed), 
                                    name = 'user_factors')
    self.item_factors = tf.Variable(tf.truncated_normal((n_items, self.n_factors), seed = self.seed), 
                                    name = 'item_factors')
    self.user_img_factors = tf.Variable(tf.truncated_normal((n_users, self.n_img_factors), seed = self.seed), 
                                        name = 'user_img_factors')
    self.img_embedding = tf.Variable(tf.truncated_normal((self.n_img_features, self.n_img_factors), seed = self.seed), 
                                     name = 'img_embedding') # ?? size
    self.img_bias = tf.Variable(tf.zeros(self.n_img_features), name = 'img_bias')
    self.item_bias = tf.Variable(tf.zeros(n_items), name = 'item_bias')
    
    # the input data is the sampled batch of users index,
    # its corresponding positive and negative items' inedx and
    # image features of the positive and negative items
    self._slice_u = tf.placeholder(tf.int32, self.n_batch_size)
    self._slice_i = tf.placeholder(tf.int32, self.n_batch_size)
    self._slice_j = tf.placeholder(tf.int32, self.n_batch_size)
    self._img_i = tf.placeholder(tf.float32, [None, self.n_img_features]) # ?? size
    self._img_j = tf.placeholder(tf.float32, [None, self.n_img_features]) # ?? size
    
    # use tf.gather() to select a non-contiguous slice from the tensor
    # http://stackoverflow.com/questions/35146444/tensorflow-python-accessing-individual-elements-in-a-tensor
    img_u  = tf.gather(self.user_img_factors, self._slice_u)
    user_u = tf.gather(self.user_factors, self._slice_u)
    item_i = tf.gather(self.item_factors, self._slice_i)
    item_j = tf.gather(self.item_factors, self._slice_j)
    bias_i = tf.gather(self.item_bias, self._slice_i)
    bias_j = tf.gather(self.item_bias, self._slice_j)
    img_bias_i = tf.gather(self.img_bias, self._slice_i)
    img_bias_j = tf.gather(self.img_bias, self._slice_j)

    # decompose the estimator, compute the difference between
    # the score of the positive items i and negative items j
    temp_img_ui = tf.matmul(tf.transpose(self.img_embedding), tf.transpose(self._img_i))
    temp_img_uj = tf.matmul(tf.transpose(self.img_embedding), tf.transpose(self._img_j))
    img_ui = tf.diag_part( tf.matmul(img_u, temp_img_ui) )
    img_uj = tf.diag_part( tf.matmul(img_u, temp_img_uj) )
    x_ui = tf.diag_part( tf.matmul(user_u, tf.transpose(item_i)) )
    x_uj = tf.diag_part( tf.matmul(user_u, tf.transpose(item_j)) )
    img_bias_i = tf.diag_part( tf.matmul(tf.transpose(img_bias_i), tf.transpose(self._img_i)) )
    img_bias_j = tf.diag_part( tf.matmul(tf.transpose(img_bias_j), tf.transpose(self._img_j)) )
    x_uij = bias_i - bias_j + x_ui - x_uj + img_ui - img_uj + img_bias_i - img_bias_j

    # minimize the cost
    cost_u = self.reg * tf.reduce_sum(user_u ** 2) + self.reg_img * tf.reduce_sum(img_u ** 2)
    cost_i = self.reg * tf.reduce_sum(item_i ** 2) + tf.reduce_sum(bias_i ** 2) + tf.reduce_sum(img_bias_i ** 2)
    cost_j = self.reg * tf.reduce_sum(item_j ** 2) + tf.reduce_sum(bias_j ** 2) + tf.reduce_sum(img_bias_j ** 2)
    cost_embedding = self.reg_img * tf.reduce_sum(img_embedding ** 2)
    cost_uij = tf.reduce_sum( tf.log(tf.nn.sigmoid(x_uij)) )
    self._total_cost = cost_u + cost_i + cost_j + cost_embedding - cost_uij
    optimizer = tf.train.AdamOptimizer(self.learning_rate)
    self._train_step = optimizer.minimize(self._total_cost)
    return self

In [None]:
def vbpr(user_count, item_count, hidden_dim=20, hidden_img_dim=128, 
         learning_rate = 0.001,
         l2_regulization = 0.01, 
         bias_regulization=1.0):
"""
user_count: total number of users
item_count: total number of items
hidden_dim: hidden feature size of MF
hidden_img_dim: [4096, hidden_img_dim]
"""
u = tf.placeholder(tf.int32, [None])
i = tf.placeholder(tf.int32, [None])
j = tf.placeholder(tf.int32, [None])

# image features
iv = tf.placeholder(tf.float32, [None, 4096])
jv = tf.placeholder(tf.float32, [None, 4096])

with tf.device("/gpu:1"):
    user_emb_w = tf.get_variable("user_emb_w", [user_count+1, hidden_dim], 
                                initializer=tf.random_normal_initializer(0, 0.1))
    item_emb_w = tf.get_variable("item_emb_w", [item_count+1, hidden_dim], 
                                initializer=tf.random_normal_initializer(0, 0.1))
    item_b = tf.get_variable("item_b", [item_count+1, 1], 
                                initializer=tf.constant_initializer(0.0))
    # user image 
    user_img_w = tf.get_variable("user_img_w", [user_count+1, hidden_img_dim],
                                initializer=tf.random_normal_initializer(0, 0.1))
    # change to gather
    u_emb = tf.nn.embedding_lookup(user_emb_w, u)
    u_img = tf.nn.embedding_lookup(user_img_w, u)

    i_emb = tf.nn.embedding_lookup(item_emb_w, i)
    i_b = tf.nn.embedding_lookup(item_b, i)
    j_emb = tf.nn.embedding_lookup(item_emb_w, j)
    j_b = tf.nn.embedding_lookup(item_b, j)

with tf.device("/gpu:1"):
    img_emb_w = tf.get_variable("image_embedding_weights", [4096, hidden_img_dim], 
                               initializer=tf.random_normal_initializer(0, 0.1))

    img_i_j = tf.matmul(iv - jv,  img_emb_w)

    # MF predict: u_i > u_j
    x = i_b - j_b + tf.reduce_sum(tf.mul(u_emb, (i_emb - j_emb)), 1, keep_dims=True) + \
        tf.reduce_sum(tf.mul(u_img, img_i_j),1, keep_dims=True)

    # auc score is used in test/cv
    # reduce_mean is reasonable BECAUSE
    # all test (i, j) pairs of one user is in ONE batch
    auc = tf.reduce_mean(tf.to_float(x > 0))

    l2_norm = tf.add_n([
            tf.reduce_sum(tf.mul(u_emb, u_emb)), 
            tf.reduce_sum(tf.mul(u_img, u_img)),
            tf.reduce_sum(tf.mul(i_emb, i_emb)),
            tf.reduce_sum(tf.mul(j_emb, j_emb)),
            tf.reduce_sum(tf.mul(img_emb_w, img_emb_w)),
            bias_regulization * tf.reduce_sum(tf.mul(i_b, i_b)),
            bias_regulization * tf.reduce_sum(tf.mul(j_b, j_b))
        ])

    loss = l2_norm - tf.reduce_mean(tf.log(tf.sigmoid(x)))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    #return u, i, j, iv, jv, loss, auc, train_op