In [1]:
"""
Triplet loss network example for recommenders
"""

from __future__ import print_function

import numpy as np
import pandas as pd
from keras import backend as K
from keras.models import Model
from keras.layers import Embedding, Flatten, Input, merge
from keras.optimizers import Adam


def identity_loss(y_true, y_pred):

    return K.mean(y_pred - 0 * y_true)


def bpr_triplet_loss(X):

    positive_item_latent, negative_item_latent, user_latent = X

    # BPR loss
    loss = 1.0 - K.sigmoid(
        K.sum(user_latent * positive_item_latent, axis=-1, keepdims=True) -
        K.sum(user_latent * negative_item_latent, axis=-1, keepdims=True))

    return loss


def build_model(num_users, num_items, latent_dim):

    positive_item_input = Input((1, ), name='positive_item_input')
    negative_item_input = Input((1, ), name='negative_item_input')

    # Shared embedding layer for positive and negative items
    item_embedding_layer = Embedding(
        num_items, latent_dim, name='item_embedding', input_length=1)

    user_input = Input((1, ), name='user_input')

    positive_item_embedding = Flatten()(item_embedding_layer(
        positive_item_input))
    negative_item_embedding = Flatten()(item_embedding_layer(
        negative_item_input))
    user_embedding = Flatten()(Embedding(
        num_users, latent_dim, name='user_embedding', input_length=1)(
            user_input))

    loss = merge(
        [positive_item_embedding, negative_item_embedding, user_embedding],
        mode=bpr_triplet_loss,
        name='loss',
        output_shape=(1, ))

    model = Model(
        input=[positive_item_input, negative_item_input, user_input],
        output=loss)
    model.compile(loss=identity_loss, optimizer=Adam())
    
    

    return model

Using Theano backend.


In [23]:
filename = 'gowallaTrain'
separater = '\t'
names = ['uid','vid','hour','day','time']
df = pd.read_csv(filename, sep=separater, names=names)
uNum = df.uid.unique().max()
vNum = df.vid.unique().max()
gb = df.groupby('uid')    
train = [gb.get_group(x) for x in gb.groups]

filename = 'gowallaTest'
df = pd.read_csv(filename, sep=separater, names=names)
gb = df.groupby('uid') 
test = [gb.get_group(x) for x in gb.groups]


54315
[   33    32     6 ..., 52850 55680 56753]


In [None]:
def get_triplets(mat):
    return mat.tocoo().row, mat.tocoo().col, np.random.randint(mat.shape[1], size=len(mat.tocoo().row))

train = sp.lil_matrix((uNum, vNum), dtype=np.int32)
test = sp.lil_matrix((uNum, vNum), dtype=np.int32)

for user in range(uNum):
    split_index = int(len(userCheckins[user])*0.8)
    train_ = userCheckins[user][:split_index]
    test_ = userCheckins[user][split_index:]
    
    for vid in train_.vid.values:
        train[user,vid] = 1
    for vid in test_.vid.values:
        test[user,vid] = 1
    

In [None]:
latent_dim = 10
num_epochs = 10

model = build_model(uNum, vNum, latent_dim)

# Print the model structure
print(model.summary())