<a href="https://colab.research.google.com/github/username1437/username1437/blob/main/TensorFlow(Colab)ML-Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q tensorflow-ranking

In [None]:
import tensorflow_datasets as tfds

In [None]:
ratings_data = tfds.load('movielens/100k-ratings', split="train")
fetures_data = tfds.load('movielens/100k-movies', split="train")

In [None]:
ratings_data = ratings_data.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"]
})

In [None]:
print(ratings_data)

<MapDataset element_spec={'movie_title': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_id': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_rating': TensorSpec(shape=(), dtype=tf.float32, name=None)}>


In [None]:
import tensorflow as tf
from tensorflow.keras import layers 
feature_data = fetures_data.map(lambda x: x["movie_title"])
users = ratings_data.map(lambda x: x["user_id"])

user_ids_vocabulary = layers.experimental.preprocessing.StringLookup(
    mask_token=None)
user_ids_vocabulary.adapt(users.batch(1000))

movie_titles_vocabulary = layers.experimental.preprocessing.StringLookup(
    mask_token=None)
movie_titles_vocabulary.adapt(feature_data.batch(1000))

In [None]:
key_func = lambda x: user_ids_vocabulary(x["user_id"])
reduce_func = lambda key, dataset: dataset.batch(100)
train = ratings_data.group_by_window(
    key_func=key_func, reduce_func=reduce_func, window_size=100)

In [None]:
print(train)
for x in train.take(1):
  for key, value in x.items():
    print(f"Shape of {key}: {value.shape}")
    print(f"Example values of {key}: {value[:5].numpy()}")
    print()

<_GroupByWindowDataset element_spec={'movie_title': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'user_id': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'user_rating': TensorSpec(shape=(None,), dtype=tf.float32, name=None)}>
Shape of movie_title: (100,)
Example values of movie_title: [b'Man Who Would Be King, The (1975)' b'Silence of the Lambs, The (1991)'
 b'Next Karate Kid, The (1994)' b'2001: A Space Odyssey (1968)'
 b'Usual Suspects, The (1995)']

Shape of user_id: (100,)
Example values of user_id: [b'405' b'405' b'405' b'405' b'405']

Shape of user_rating: (100,)
Example values of user_rating: [1. 4. 1. 5. 5.]



In [None]:
from typing import Dict, Tuple
def _features_and_labels(
    x: Dict[str, tf.Tensor]) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]:
  labels = x.pop("user_rating")
  return x, labels


train = train.map(_features_and_labels)

train = train.apply(
    tf.data.experimental.dense_to_ragged_batch(batch_size=32))

In [None]:
from tensorflow.keras import Model
class RankingModel(Model):

  def __init__(self, user_vocab, movie_vocab):
    super().__init__()

    # Set up user and movie vocabulary and embedding.
    self.user_vocab = user_vocab
    self.movie_vocab = movie_vocab
    self.user_embed = layers.Embedding(user_vocab.vocabulary_size(),
                                                64)
    self.movie_embed = layers.Embedding(movie_vocab.vocabulary_size(),
                                                 64)

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    # Define how the ranking scores are computed: 
    # Take the dot-product of the user embeddings with the movie embeddings.

    embeddings_user= self.user_embed(self.user_vocab(features["user_id"]))
    embeddings_movie = self.movie_embed(
        self.movie_vocab(features["movie_title"]))

    return tf.reduce_sum(embeddings_user * embeddings_movie, axis=2)

In [None]:
import tensorflow_ranking as tfr
from tensorflow.keras import optimizers
model = RankingModel(user_ids_vocabulary, movie_titles_vocabulary)
optimizer = optimizers.Adagrad(0.5)
loss = tfr.keras.losses.get(
    loss=tfr.keras.losses.RankingLossKey.SOFTMAX_LOSS, ragged=True)
eval_metrics = [
    tfr.keras.metrics.get(key="ndcg", name="metric/ndcg", ragged=True),
    tfr.keras.metrics.get(key="mrr", name="metric/mrr", ragged=True)
]
model.compile(optimizer=optimizer, loss=loss, metrics=eval_metrics)

In [None]:
history = model.fit(train, epochs=9)

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


In [None]:
history.history

{'loss': [989.3775634765625,
  989.2220458984375,
  989.0970458984375,
  989.0180053710938,
  988.9356079101562,
  988.85986328125,
  988.8113403320312,
  988.7803955078125,
  988.74658203125],
 'metric/mrr': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 'metric/ndcg': [0.9892312288284302,
  0.9903042912483215,
  0.9908571839332581,
  0.9917376041412354,
  0.9924544095993042,
  0.9931710958480835,
  0.9932291507720947,
  0.9937751293182373,
  0.9936739802360535]}

In [None]:
# Get movie title candidate list.
for movie_titles in feature_data.batch(2000):
  break

# Generate the input for user 42.
inputs = {
    "user_id":
        tf.expand_dims(tf.repeat("26", repeats=movie_titles.shape[0]), axis=0),
    "movie_title":
        tf.expand_dims(movie_titles, axis=0)
}

# Get movie recommendations for user 42.
scores = model(inputs)
titles = tfr.utils.sort_by_scores(scores,
                                  [tf.expand_dims(movie_titles, axis=0)])[0]
print(f"Top 10 recommendations for user 26: {titles[0, :10]}")

Top 10 recommendations for user 26: [b'Lawrence of Arabia (1962)' b'Titanic (1997)'
 b'Maltese Falcon, The (1941)' b'North by Northwest (1959)'
 b'Blues Brothers, The (1980)' b'Graduate, The (1967)'
 b'Great Escape, The (1963)' b'Princess Bride, The (1987)'
 b'Forrest Gump (1994)' b'Legends of the Fall (1994)']
