In [1]:
from __future__ import absolute_import, division, print_function
import pandas as pd

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

from tensorflow.keras.optimizers import Adam, SGD

1.13.1


In [2]:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']

ratings_base = pd.read_csv(r'..\data\ml-100k\ub.base', sep='\t', names=r_cols, encoding='latin-1')
ratings_test = pd.read_csv(r'..\data\ml-100k\ub.test', sep='\t', names=r_cols, encoding='latin-1')

rate_train = ratings_base.values
rate_test = ratings_test.values

# indices start from 0
rate_train[:, :2] -= 1
rate_test[:, :2] -= 1

In [3]:
users = np.unique(rate_train[:, 0])
num_users = np.max(users) + 1
num_users

943

In [4]:
items = np.unique(rate_train[:, 1])
num_items = np.max(items) + 1
num_items

1682

In [124]:
def construct_model(params):
  # type: (tf.Tensor, tf.Tensor, dict) -> tf.Tensor
  """Initialize NeuMF model.

  Args:
    users: Tensor of user ids.
    items: Tensor of item ids.
    params: Dict of hyperparameters.

  Raises:
    ValueError: if the first model layer is not even.
  """

  num_users = params["num_users"]
  num_items = params["num_items"]
    
  model_layers = params["model_layers"]
    
  mf_regularization = params["mf_regularization"]
  mlp_reg_layers = params["mlp_reg_layers"]

  mf_dim = params["mf_dim"]

  if model_layers[0] % 2 != 0:
    raise ValueError("The first layer size should be multiple of 2!")

  # Input variables
  user_input = tf.keras.layers.Input(shape=(1,))
  item_input = tf.keras.layers.Input(shape=(1,))

  # Initializer for embedding layers
  embedding_initializer = "glorot_uniform"

  # Embedding layers of GMF and MLP
  mf_embedding_user = tf.keras.layers.Embedding(
      num_users,
      mf_dim,
      embeddings_initializer=embedding_initializer,
      embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
      input_length=1)
  mf_embedding_item = tf.keras.layers.Embedding(
      num_items,
      mf_dim,
      embeddings_initializer=embedding_initializer,
      embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
      input_length=1)
  mlp_embedding_user = tf.keras.layers.Embedding(
      num_users,
      model_layers[0]//2,
      embeddings_initializer=embedding_initializer,
      embeddings_regularizer=tf.keras.regularizers.l2(mlp_reg_layers[0]),
      input_length=1)
  mlp_embedding_item = tf.keras.layers.Embedding(
      num_items,
      model_layers[0]//2,
      embeddings_initializer=embedding_initializer,
      embeddings_regularizer=tf.keras.regularizers.l2(mlp_reg_layers[0]),
      input_length=1)

  # GMF part
  mf_user_latent = mf_embedding_user(user_input)
  mf_item_latent = mf_embedding_item(item_input)
  # Element-wise multiply
  mf_vector = tf.keras.layers.multiply([mf_user_latent, mf_item_latent])

  # MLP part
  mlp_user_latent = mlp_embedding_user(user_input)
  mlp_item_latent = mlp_embedding_item(item_input)
  # Concatenation of two latent features
  mlp_vector = tf.keras.layers.concatenate([mlp_user_latent, mlp_item_latent])

  num_layer = len(model_layers)  # Number of layers in the MLP
  for layer in range(1, num_layer):
    model_layer = tf.keras.layers.Dense(
        model_layers[layer],
        kernel_regularizer=tf.keras.regularizers.l2(mlp_reg_layers[layer]),
        activation="relu")
    mlp_vector = model_layer(mlp_vector)

  # Concatenate GMF and MLP parts
  predict_vector = tf.keras.layers.concatenate([mf_vector, mlp_vector])

  # Final prediction layer
  logits = tf.keras.layers.Dense(
      1, activation=None, kernel_initializer="lecun_uniform",
      name='Rating')(predict_vector)

  # Print model topology.
  model = tf.keras.models.Model([user_input, item_input], logits)
  model.summary()

  return model

In [126]:
params = {
    'num_users': num_users,
    'num_items': num_items,
    'mf_dim': 20,
    'mf_regularization': 0.001,
    'model_layers': [40, 40, 40],
    'mlp_reg_layers': [0.001, 0.001, 0.001]
}
model = construct_model(params)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_37 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_38 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_40 (Embedding)        (None, 1, 20)        18860       input_37[0][0]                   
__________________________________________________________________________________________________
embedding_41 (Embedding)        (None, 1, 20)        33640       input_38[0][0]                   
__________________________________________________________________________________________________
concatenat

In [None]:
model.compile(optimizer=Adam(lr=0.00001), loss='MSE')
model.fit(x=[rate_train[:,0], rate_train[:,1]], y=rate_train[:,2], epochs=100, batch_size=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100

In [None]:
model.compile(optimizer=Adam(lr=0.000001), loss='MSE')
model.fit(x=[rate_train[:,0], rate_train[:,1]], y=rate_train[:,2], epochs=100, batch_size=1)

In [107]:
model.predict(x = [[0], [0]])

array([[[4.938753]]], dtype=float32)

In [108]:
model.predict(x = [[0], [1]])

array([[[2.9651628]]], dtype=float32)

In [61]:
rate_train

array([[        0,         0,         5, 874965758],
       [        0,         1,         3, 876893171],
       [        0,         2,         4, 878542960],
       ...,
       [      942,      1187,         3, 888640250],
       [      942,      1227,         3, 888640275],
       [      942,      1329,         3, 888692465]], dtype=int64)