In [4]:
import tensorflow as tf
from tensorflow.keras import layers
print("tensorflow version:",tf.__version__)

class MLP(layers.Layer):
    def __init__(self, num_users, num_items, hidden_dim, layers_dim):
        super(MLP,self).__init__()
        self.mlp_user_emb = layers.Embedding(num_users, hidden_dim)
        self.mlp_item_emb = layers.Embedding(num_items, hidden_dim)
        
        layers_dim = [layers_dim] if type(layers_dim) is int else layers_dim
        self.mlp_fc_layers = [layers.Dense(i, activation='relu') for i in layers_dim]
        
        self.mlp_last = layers.Dense(1, activation='sigmoid')
    
    def call(self, user_indices, item_indices):
        u = self.mlp_user_emb(user_indices)
        i = self.mlp_item_emb(item_indices)
        output = tf.concat([u,i], 1)
        for l in self.mlp_fc_layers:
            output = l(output)
        result = self.mlp_last(output)
        return result

tensorflow version: 2.0.0-beta1


In [5]:
import pandas as pd
from utils.data import SampleGenerator

ratings = pd.read_csv('dataset/ratings.csv')
ratings = ratings.rename(columns={'movieId': 'itemId'})

userId = list(set(ratings.userId))
new_userId = list(range(0,len(userId)))
df = pd.DataFrame({'userId':userId,'new_userId':new_userId})
ratings = pd.merge(ratings,df,how='left', on='userId')

itemId = list(set(ratings.itemId))
new_itemId = list(range(0,len(itemId)))
df = pd.DataFrame({'itemId':itemId,'new_itemId':new_itemId})
ratings = pd.merge(ratings,df,how='left', on='itemId')

ratings = ratings.drop(['userId', 'itemId'],axis = 1)
ratings = ratings.rename(columns={'new_userId':'userId', 'new_itemId':'itemId'})

data = SampleGenerator(ratings, implicit=True)
hidden_dim = 128
layers_dim = [128]
lr = 0.001
batch_size = 2048
epochs = 15

num_users = data.num_users
num_items = data.num_items
num_negatives_train = 5
num_negatives_test = 500

model = MLP(num_users, num_items, hidden_dim, layers_dim)

# Instantiate a logistic loss function.
loss_fn = tf.keras.losses.BinaryCrossentropy()
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

In [10]:
import os
from utils.eval import Evaluation

#TODO: model save code
#if not os.path.exists("./checkpoint"):
#    os.mkdir("./checkpoint")

test_loader, negative_loader = data.instance_test_loader(num_negatives = num_negatives_test, batch_size = batch_size)
for epoch in range(1,epochs+1):
    train_loader = data.instance_a_train_loader(num_negatives=num_negatives_train, batch_size=batch_size)
    total_loss = 0
    for batch_id, batch in enumerate(train_loader):
        user, item, rating = batch[0], batch[1], batch[2]
        # Open a GradientTape.
        with tf.GradientTape() as tape:
            # Forward pass.
            pred = model(user, item)
            # Loss value for this batch.
            loss = loss_fn(rating, tf.reshape(pred, [-1]))
        # Get gradients of weights wrt the loss.
        gradients = tape.gradient(loss, model.trainable_weights)
        # Update the weights of our linear layer.
        optimizer.apply_gradients(zip(gradients, model.trainable_weights))
        total_loss += float(loss)

    print("epoch{0} loss:{1:.4f}".format(epoch, total_loss))
  
    #TODO: model save code
  
    test_users, test_items, test_preds = list(), list(), list()
    neg_users, neg_items, neg_preds = list(), list(), list()

    for batch in test_loader:
        user, item = batch[0], batch[1]
        test_users += user.numpy().tolist()
        test_items += item.numpy().tolist()
        pred = model(user,item)
        pred = tf.reshape(pred, [-1])
        test_preds += pred.numpy().tolist()

    for batch in negative_loader:
        user, item = batch[0], batch[1]
        neg_users += user.numpy().tolist()
        neg_items += item.numpy().tolist()
        pred = model(user, item)
        pred = tf.reshape(pred, [-1])
        neg_preds += pred.numpy().tolist()
    
    eval = Evaluation([test_users, test_items, test_preds,
                       neg_users, neg_items, neg_preds])
    eval.print_eval_score_k(10)
        

W0619 00:12:25.427698  2720 deprecation.py:323] From C:\Users\hojin\Anaconda3\envs\tensorflow2\lib\site-packages\tensorflow\python\ops\math_grad.py:1220: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


epoch1 loss:76.9130
recall@10:0.5908, prec@10:0.2512
epoch2 loss:62.7367
recall@10:0.6031, prec@10:0.2583
epoch3 loss:61.8255
recall@10:0.6030, prec@10:0.2580
epoch4 loss:61.1446
recall@10:0.6078, prec@10:0.2622
epoch5 loss:60.5392
recall@10:0.6219, prec@10:0.2691
epoch6 loss:59.3658
recall@10:0.6359, prec@10:0.2795
epoch7 loss:58.0553
recall@10:0.6355, prec@10:0.2781
epoch8 loss:55.7552
recall@10:0.6523, prec@10:0.2924
epoch9 loss:52.2838
recall@10:0.6697, prec@10:0.3022
epoch10 loss:48.5580
recall@10:0.6702, prec@10:0.3044
epoch11 loss:45.6952
recall@10:0.6801, prec@10:0.3073
epoch12 loss:43.4289
recall@10:0.6753, prec@10:0.3096
epoch13 loss:41.7727
recall@10:0.6763, prec@10:0.3081
epoch14 loss:40.0549
recall@10:0.6732, prec@10:0.3091
epoch15 loss:38.6529
recall@10:0.6744, prec@10:0.3078
