In [20]:
"""
Triplet loss network example for recommenders
"""

from __future__ import print_function

import numpy as np
import scipy.sparse as sp
import pandas as pd
from keras import backend as K
from keras.models import Model
from keras.layers import Embedding, Flatten, Input, merge
from keras.optimizers import Adam,SGD
import metrics
from keras.regularizers import l2, activity_l2
import datetime


def identity_loss(y_true, y_pred):

    return K.mean(y_pred - 0 * y_true)


def bpr_triplet_loss(X):

    positive_item_latent, negative_item_latent, user_latent = X
    
    regularizer = 0.01 * ((user_latent ** 2).sum() +
                                  (positive_item_latent ** 2).sum() +
                                  (negative_item_latent ** 2).sum())



    # BPR loss
    loss = 1 - K.sigmoid(
        K.sum(user_latent * positive_item_latent, axis=-1, keepdims=True) -
        K.sum(user_latent * negative_item_latent, axis=-1, keepdims=True))
    
    
    
    return loss


def build_model(num_users, num_items, latent_dim):

    positive_item_input = Input((1, ), name='positive_item_input')
    negative_item_input = Input((1, ), name='negative_item_input')

    # Shared embedding layer for positive and negative items
    item_embedding_layer = Embedding(
        num_items, latent_dim, name='item_embedding', input_length=1,W_regularizer=l2(0.001))

    user_input = Input((1, ), name='user_input')

    positive_item_embedding = Flatten()(item_embedding_layer(
        positive_item_input))
    negative_item_embedding = Flatten()(item_embedding_layer(
        negative_item_input))
    user_embedding = Flatten()(Embedding(
        num_users, latent_dim, name='user_embedding', input_length=1,W_regularizer=l2(0.001))(
            user_input))

    loss = merge(
        [positive_item_embedding, negative_item_embedding, user_embedding],
        mode=bpr_triplet_loss,
        name='loss',
        output_shape=(1, ))

    model = Model(
        input=[positive_item_input, negative_item_input, user_input],
        output=loss)
#     model.compile(loss=identity_loss, optimizer=Adam())
    model.compile(loss=identity_loss, optimizer=SGD(lr=0.0001))

    
    

    return model

In [21]:
filename = 'gowallaTrain'
separater = '\t'
names = ['uid','vid','hour','day','time']
df = pd.read_csv(filename, sep=separater, names=names)
uNum = df.uid.unique().max()+1
vNum = df.vid.unique().max()+1
gb = df.groupby('uid')    
train = [gb.get_group(x) for x in gb.groups]

filename = 'gowallaTest'
df = pd.read_csv(filename, sep=separater, names=names)
gb = df.groupby('uid') 
test = [gb.get_group(x) for x in gb.groups]


In [22]:
def get_triplets(mat):
    return mat.tocoo().row, mat.tocoo().col, np.random.randint(mat.shape[1], size=len(mat.tocoo().row))

sp_train = sp.lil_matrix((uNum, vNum), dtype=np.int32)
sp_test = sp.lil_matrix((uNum, vNum), dtype=np.int32)

for user in range(uNum):
    sp_train_ = train[user]
    sp_test_ = test[user]
    
    for vid in sp_train_.vid.values:
        sp_train[user,vid] = 1
    for vid in sp_test_.vid.values:
#       venue does not occur in training will be removed.
        if(vid > vNum):
            continue;
        sp_test[user,vid] = 1
    

In [73]:
latent_dim = 10
num_epochs = 100

model = build_model(uNum, vNum, latent_dim)

# Print the model structure
# print(model.summary())

for epoch in range(num_epochs):
    
    print(datetime.datetime.now())
    print('Epoch %s' % epoch)
    
    uid, pid, nid = get_triplets(sp_train)

    X = {
        'user_input': uid,
        'positive_item_input': pid,
        'negative_item_input': nid
    }

    model.fit(X,
              np.ones(len(uid)),
              batch_size=64,
              nb_epoch=1,
              verbose=0,
              shuffle=True)

print('AUC %s' % metrics.full_auc(model, sp_test))
print(datetime.datetime.now())





2017-03-12 23:30:03.031833
Epoch 0


KeyboardInterrupt: 

In [19]:
bpr = build_model(34796,57403, 10)
print(bpr.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
positive_item_input (InputLayer) (None, 1)             0                                            
____________________________________________________________________________________________________
negative_item_input (InputLayer) (None, 1)             0                                            
____________________________________________________________________________________________________
user_input (InputLayer)          (None, 1)             0                                            
____________________________________________________________________________________________________
item_embedding (Embedding)       (None, 1, 10)         574030      positive_item_input[0][0]        
                                                                   negative_item_input[0][0

In [25]:
uid, pid, nid = get_triplets(sp_train)
print(uid)
print(pid)
print(nid)

[    0     0     0 ..., 34795 34795 34795]
[    1     2     6 ..., 46200 50113 54517]
[47140 19691 51320 ..., 40088 43783 12070]


# 