In [1]:
from typing import Dict, Text
 
import numpy as np
import pandas as pd
import tensorflow as tf
 
import tensorflow_recommenders as tfrs 

In [3]:
# Read the data
df = pd.read_csv("../../datasets/recommenders/bookcrossings/Preprocessed_data.csv", index_col=0)
# Get take the columns we need for training
rating_data = df[["user_id","book_title", "rating"]]

In [None]:
rating_data['user_id'] = rating_data.user_id.astype('str')
rating_data['book_title'] = rating_data.book_title.astype('str')
rating_data['rating'] = rating_data.rating.astype(np.float32)

In [8]:
dataset = tf.data.Dataset.from_tensor_slices(
    (
      tf.cast(rating_data['user_id'].values.reshape(-1,1), tf.string),
      tf.cast(rating_data['book_title'].values.reshape(-1,1), tf.string),
      tf.cast(rating_data['rating'].values.reshape(-1,1),tf.float32)
    )
)

In [9]:
@tf.function
def rename(x0,x1,x2):
    y = {}
    y["user_id"] = x0
    y['book_title'] = x1
    y['rating'] = x2
    return y

dataset = dataset.map(rename)

In [10]:
books = rating_data.book_title.values
users = rating_data.user_id.values

unique_book_titles = np.unique(list(books))
unique_user_ids = np.unique(list(users))

In [11]:
class RankingModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    embedding_dimension = 32

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # Compute embeddings for books.
    self.book_embeddings = tf.keras.Sequential([
      tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=unique_book_titles, mask_token=None),
      tf.keras.layers.Embedding(len(unique_book_titles) + 1, embedding_dimension)
    ])

    # Compute predictions.
    self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
  ])

  def __call__(self, x):
    
    user_id, book_name = x
    user_embedding = self.user_embeddings(user_id)
    book_embedding = self.book_embeddings(book_name)

    return self.ratings(tf.concat([user_embedding, book_embedding], axis=1))

In [12]:
class BooksModel(tfrs.models.Model):

  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def compute_loss(self, features, training=False) -> tf.Tensor:
    print(features)
    rating_predictions = self.ranking_model((features['user_id'], features["book_title"]))

    # The task computes the loss and the metrics.
    return self.task(labels=features["rating"], predictions=rating_predictions)

In [None]:
from datetime import datetime

model = BooksModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
# Cache the dataset 
cache_dataset = dataset.cache()
# Tensorboard 
logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
# Training 
model.fit(cache_dataset, epochs=5,
                    verbose=1, callbacks=[tensorboard_callback])

Epoch 1/5
{'user_id': <tf.Tensor 'IteratorGetNext:2' shape=(1,) dtype=string>, 'book_title': <tf.Tensor 'IteratorGetNext:0' shape=(1,) dtype=string>, 'rating': <tf.Tensor 'IteratorGetNext:1' shape=(1,) dtype=float32>}
{'user_id': <tf.Tensor 'IteratorGetNext:2' shape=(1,) dtype=string>, 'book_title': <tf.Tensor 'IteratorGetNext:0' shape=(1,) dtype=string>, 'rating': <tf.Tensor 'IteratorGetNext:1' shape=(1,) dtype=float32>}