In [None]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
import tensorflow_recommenders as tfrs

In [None]:
data=pd.read_csv('RetrievalV2.csv')

In [None]:
data.head()

In [None]:
#data['No']=data['No'].astype('float32')

In [None]:
#data['data'] = pd.Categorical(data['data'])
#data['data'] = data.data.cat.codes

In [None]:
ratings = tf.data.Dataset.from_tensor_slices((data['No'].astype(str).str.encode("utf-8"),data['data'].astype(str).str.encode("utf-8")))
movies = tf.data.Dataset.from_tensor_slices(data['data'])

In [None]:
#tfds.as_dataframe(ratings.take(10))

TFRS

In [None]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(2000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(1600)
test = shuffled.skip(1600).take(400)

In [None]:
movie_titles = movies.batch(100)
user_ids = ratings.batch(10000)

unique_movie_titles = np.unique(data['data'].astype(str).str.encode("utf-8"))
unique_user_ids = np.unique(data['No'].astype(str).str.encode("utf-8"))

unique_movie_titles[:10]

In [None]:
embedding_dimension = 32

In [None]:
user_model = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.StringLookup(
      vocabulary=unique_user_ids, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])

In [None]:
movie_model = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.StringLookup(
      vocabulary=unique_movie_titles, mask_token=None),
  tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension)
])

In [None]:
metrics = tfrs.metrics.FactorizedTopK(
  candidates=movies.batch(128).map(movie_model)
)

In [None]:
task = tfrs.tasks.Retrieval(
  metrics=metrics
)

In [None]:
class MovielensModel(tfrs.Model):

  def __init__(self, user_model, movie_model):
    super().__init__()
    self.movie_model: tf.keras.Model = movie_model
    self.user_model: tf.keras.Model = user_model
    self.task: tf.keras.layers.Layer = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # We pick out the user features and pass them into the user model.
    user_embeddings = self.user_model(features[0])
    # And pick out the movie features and pass them into the movie model,
    # getting embeddings back.
    positive_movie_embeddings = self.movie_model(features[1])

    # The task computes the loss and the metrics.
    return self.task(user_embeddings, positive_movie_embeddings)

In [None]:
class NoBaseClassMovielensModel(tf.keras.Model):

  def __init__(self, user_model, movie_model):
    super().__init__()
    self.movie_model: tf.keras.Model = movie_model
    self.user_model: tf.keras.Model = user_model
    self.task: tf.keras.layers.Layer = task

  def train_step(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:

    # Set up a gradient tape to record gradients.
    with tf.GradientTape() as tape:

      # Loss computation.
      user_embeddings = self.user_model(features[0])
      positive_movie_embeddings = self.movie_model(features[1])
      loss = self.task(user_embeddings, positive_movie_embeddings)

      # Handle regularization losses as well.
      regularization_loss = sum(self.losses)

      total_loss = loss + regularization_loss

    gradients = tape.gradient(total_loss, self.trainable_variables)
    self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

    metrics = {metric.name: metric.result() for metric in self.metrics}
    metrics["loss"] = loss
    metrics["regularization_loss"] = regularization_loss
    metrics["total_loss"] = total_loss

    return metrics

  def test_step(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:

    # Loss computation.
    user_embeddings = self.user_model(features[0])
    positive_movie_embeddings = self.movie_model(features[1])
    loss = self.task(user_embeddings, positive_movie_embeddings)

    # Handle regularization losses as well.
    regularization_loss = sum(self.losses)

    total_loss = loss + regularization_loss

    metrics = {metric.name: metric.result() for metric in self.metrics}
    metrics["loss"] = loss
    metrics["regularization_loss"] = regularization_loss
    metrics["total_loss"] = total_loss

    return metrics

In [None]:
model = MovielensModel(user_model, movie_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [None]:
cached_train = train.shuffle(2000).batch(256).cache()
cached_test = test.batch(128).cache()

In [None]:
model.fit(cached_train, epochs=20)

In [None]:
data.shape