In [4]:
import tensorflow as tf
from tensorflow.keras import layers
print("tensorflow version:",tf.__version__)

class GMF(layers.Layer):
    def __init__(self, num_users, num_items, hidden_dim):
        super(GMF,self).__init__()
        self.gmf_user_emb = layers.Embedding(num_users, hidden_dim)
        self.gmf_item_emb = layers.Embedding(num_items, hidden_dim)
        self.final = layers.Dense(1, activation='sigmoid')
    
    def call(self, user_indices, item_indices):
        u = self.gmf_user_emb(user_indices)
        i = self.gmf_item_emb(item_indices)
        elemwise_mul = tf.multiply(u,i)
        output = self.final(elemwise_mul)
        return output

tensorflow version: 2.0.0-beta1


In [5]:
import pandas as pd
from utils.data import SampleGenerator

ratings = pd.read_csv('dataset/ratings.csv')
ratings = ratings.rename(columns={'movieId': 'itemId'})

userId = list(set(ratings.userId))
new_userId = list(range(0,len(userId)))
df = pd.DataFrame({'userId':userId,'new_userId':new_userId})
ratings = pd.merge(ratings,df,how='left', on='userId')

itemId = list(set(ratings.itemId))
new_itemId = list(range(0,len(itemId)))
df = pd.DataFrame({'itemId':itemId,'new_itemId':new_itemId})
ratings = pd.merge(ratings,df,how='left', on='itemId')

ratings = ratings.drop(['userId', 'itemId'],axis = 1)
ratings = ratings.rename(columns={'new_userId':'userId', 'new_itemId':'itemId'})

data = SampleGenerator(ratings, implicit=True)
hidden_dim = 128
lr = 0.001
batch_size = 2048
epochs = 15

num_users = data.num_users
num_items = data.num_items
num_negatives_train = 5
num_negatives_test = 500

model = GMF(num_users, num_items, hidden_dim)

# Instantiate a logistic loss function.
loss_fn = tf.keras.losses.BinaryCrossentropy()
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

In [6]:
import os
from utils.eval import Evaluation

#TODO: model save code
#if not os.path.exists("./checkpoint"):
#    os.mkdir("./checkpoint")

test_loader, negative_loader = data.instance_test_loader(num_negatives = num_negatives_test, batch_size = batch_size)
for epoch in range(1,epochs+1):
    train_loader = data.instance_a_train_loader(num_negatives=num_negatives_train, batch_size=batch_size)
    total_loss = 0
    for batch_id, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        # Open a GradientTape.
        with tf.GradientTape() as tape:
            # Forward pass.
            pred = model(user, item)
            # Loss value for this batch.
            loss = loss_fn(rating, tf.reshape(pred, [-1]))
        # Get gradients of weights wrt the loss.
        gradients = tape.gradient(loss, model.trainable_weights)
        # Update the weights of our linear layer.
        optimizer.apply_gradients(zip(gradients, model.trainable_weights))
        total_loss += float(loss)

    print("epoch{0} loss:{1:.4f}".format(epoch, total_loss))
  
    #TODO: model save code
  
    test_users, test_items, test_preds = list(), list(), list()
    neg_users, neg_items, neg_preds = list(), list(), list()

    for batch in test_loader:
        user, item = batch[0], batch[1]
        test_users += user.numpy().tolist()
        test_items += item.numpy().tolist()
        pred = model(user,item)
        pred = tf.reshape(pred, [-1])
        test_preds += pred.numpy().tolist()

    for batch in negative_loader:
        user, item = batch[0], batch[1]
        neg_users += user.numpy().tolist()
        neg_items += item.numpy().tolist()
        pred = model(user, item)
        pred = tf.reshape(pred, [-1])
        neg_preds += pred.numpy().tolist()
    
    eval = Evaluation([test_users, test_items, test_preds,
                       neg_users, neg_items, neg_preds])
    eval.print_eval_score_k(10)
        

W0615 13:36:44.396527   212 deprecation.py:323] From C:\Users\hojin\Anaconda3\envs\tensorflow2\lib\site-packages\tensorflow\python\ops\math_grad.py:1220: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


epoch1 loss:136.5574
recall@10:0.5528, prec@10:0.2267
epoch2 loss:91.1877
recall@10:0.5807, prec@10:0.2491
epoch3 loss:63.4059
recall@10:0.5870, prec@10:0.2505
epoch4 loss:58.2001
recall@10:0.6185, prec@10:0.2671
epoch5 loss:50.9383
recall@10:0.6461, prec@10:0.2849
epoch6 loss:43.5510
recall@10:0.6597, prec@10:0.2940
epoch7 loss:37.4996
recall@10:0.6684, prec@10:0.3055
epoch8 loss:32.4520
recall@10:0.6751, prec@10:0.3082
epoch9 loss:28.3746
recall@10:0.6765, prec@10:0.3114
epoch10 loss:24.9596
recall@10:0.6775, prec@10:0.3132
epoch11 loss:21.9771
recall@10:0.6795, prec@10:0.3157
epoch12 loss:19.3032
recall@10:0.6821, prec@10:0.3196
epoch13 loss:17.2191
recall@10:0.6869, prec@10:0.3210
epoch14 loss:15.3767
recall@10:0.6846, prec@10:0.3214
epoch15 loss:13.6298
recall@10:0.6786, prec@10:0.3193
