In [None]:
# https://blog.tensorflow.org/2020/09/introducing-tensorflow-recommenders.html

In [1]:
import pandas as pd

from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds

2023-02-23 16:12:59.424467: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-23 16:13:00.044686: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:
2023-02-23 16:13:00.044749: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_pickle("../data/data.pkl")
df = df[["user_id", "name", "user_rating"]]

In [3]:
test_id = df["user_id"].max() + 1
print(test_id)

test_df = [
    [test_id, "Angel Beats!", 10],
    [test_id, "Ookami to Koushinryou", 10],
    [test_id, "Shinsekai yori", 8],
    [test_id, "Seikimatsu Occult Gakuin", 6],
    [test_id, "Futari wa Precure", 8],
    [test_id, "Shugo Chara!", 8],
    [test_id, "Uchuu Senkan Yamato 2199", 9],
]

test_df = pd.DataFrame(test_df, columns=df.columns)

73517


In [4]:
# reduce data due to memory limitation
sample_data = df.sample(10000, random_state=42)
sample_data = pd.concat([sample_data, test_df], axis=0)
sample_data["user_id"] = sample_data["user_id"].astype(str)

In [5]:
sample_data.head()

Unnamed: 0,user_id,name,user_rating
3669635,562,Ranpo Kitan: Game of Laplace,6
2412642,42684,Death Note Rewrite,7
3014556,53767,Kuroshitsuji II,7
732241,27381,DearS,7
1937180,57309,Yuusha ni Narenakatta Ore wa Shibushibu Shuush...,5


In [6]:
ratings = sample_data[["name", "user_id"]].reset_index(drop=True)

In [7]:
animes = sample_data[["name"]].drop_duplicates().reset_index(drop=True)

In [8]:
ratings

Unnamed: 0,name,user_id
0,Ranpo Kitan: Game of Laplace,562
1,Death Note Rewrite,42684
2,Kuroshitsuji II,53767
3,DearS,27381
4,Yuusha ni Narenakatta Ore wa Shibushibu Shuush...,57309
...,...,...
10002,Shinsekai yori,73517
10003,Seikimatsu Occult Gakuin,73517
10004,Futari wa Precure,73517
10005,Shugo Chara!,73517


In [9]:
animes

Unnamed: 0,name
0,Ranpo Kitan: Game of Laplace
1,Death Note Rewrite
2,Kuroshitsuji II
3,DearS
4,Yuusha ni Narenakatta Ore wa Shibushibu Shuush...
...,...
2196,Hyakujitsu no Bara
2197,Mitsudomoe Zouryouchuu!
2198,Taifuu no Noruda
2199,Gochuumon wa Usagi Desu ka??


In [10]:
ratings = tf.data.Dataset.from_tensor_slices(ratings.to_dict(orient="list"))
animes = tf.data.Dataset.from_tensor_slices(animes.to_dict(orient="list"))

2023-02-23 16:13:02.280242: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-23 16:13:02.296174: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:
2023-02-23 16:13:02.296199: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2023-02-23 16:13:02.296621: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow 

In [11]:
ratings = ratings.map(lambda x: {"name": x["name"], "user_id": x["user_id"]})

animes = animes.map(lambda x: x["name"])

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [12]:
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

In [13]:
anime_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
anime_titles_vocabulary.adapt(animes)

In [14]:
class AnimeModel(tfrs.Model):
    # We derive from a custom base class to help reduce boilerplate. Under the hood,
    # these are still plain Keras Models.

    def __init__(
        self,
        user_model: tf.keras.Model,
        anime_model: tf.keras.Model,
        task: tfrs.tasks.Retrieval,
    ):
        super().__init__()

        # Set up user and anime representations.
        self.user_model = user_model
        self.anime_model = anime_model

        # Set up a retrieval task.
        self.task = task

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        # Define how the loss is computed.

        user_embeddings = self.user_model(features["user_id"])
        anime_embeddings = self.anime_model(features["name"])

        return self.task(user_embeddings, anime_embeddings)

In [15]:
# Define user and anime models.
user_model = tf.keras.Sequential(
    [
        user_ids_vocabulary,
        tf.keras.layers.Embedding(user_ids_vocabulary.vocabulary_size(), 64),
    ]
)

anime_model = tf.keras.Sequential(
    [
        anime_titles_vocabulary,
        tf.keras.layers.Embedding(anime_titles_vocabulary.vocabulary_size(), 64),
    ]
)

In [16]:
# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(animes.batch(128).map(anime_model))
)

In [17]:
# Create a retrieval model.
model = AnimeModel(user_model, anime_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

In [18]:
# Train for 3 epochs.
model.fit(ratings.batch(1024), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f50c82e4bb0>

In [19]:
# Use brute-force search to set up retrieval using the trained representations.
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model, k=100)
index.index_from_dataset(
    animes.batch(100).map(lambda title: (title, model.anime_model(title)))
)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x7f50c80be940>

In [20]:
# Get some recommendations.
_, titles = index(np.array(["73517"]))

In [21]:
# remove watched anime
pred_df = pd.DataFrame(titles).T
pred_df.columns = ["name"]
pred_df = pred_df.applymap(lambda x: x.decode("utf-8"))
pred_df = pd.merge(pred_df, test_df, on="name", how="outer")

In [22]:
pred_df[pred_df["user_id"].isna()]["name"][0:10]

7      Aa! Megami-sama!: Tatakau Tsubasa
8                          Utawarerumono
9     Tales of Zestiria: Doushi no Yoake
10                  Clannad: After Story
11                         Inu x Boku SS
12                          Sacred Seven
13                    Witch Hunter Robin
14               Kyou, Koi wo Hajimemasu
15                              Iron Man
16            Mahou Shoujo Sonico★Magica
Name: name, dtype: object