<a href="https://colab.research.google.com/github/dtuleva/Recipe_Recommendation_System/blob/main/tfrs_quickstart_recreate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mlflow --quiet



[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.1/19.1 MB[0m [31m41.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.3/150.3 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.1/148.1 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.2/80.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m591.1 kB/s[0m eta [36m0:00:00[0m
[?25h

In [151]:
# zip mlflow logs
# !zip -r /content/test_experiment.zip /content/mlruns

# Recreate tfrs quickstart tutorial



In [1]:
import mlflow
import pandas as pd

In [2]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

In [33]:
mlflow.create_experiment("tfrs_quickstart")

MlflowException: Invalid experiment ID: '.ipynb_checkpoints'

##### Recreate dataset structure with recipe data

In [35]:
ratings_data = pd.read_csv("/content/mock-data_interaction.csv")
recipes_data = pd.read_csv("/content/mock-data_recipe.csv")

In [5]:
ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_data)) # dict for columns of  different dtypes

In [6]:
recipes = tf.data.Dataset.from_tensor_slices(recipes_data.recipe_id)

In [7]:
# feature selection
ratings = ratings.map(lambda x: {
    "recipe_id": x["recipe_id"],
    "user_id": x["user_id"]
})

In [8]:
for r in ratings.take(1):
  print(type(r))
  for k, v in r.items():
    print(f"{k}: {v}")

<class 'dict'>
recipe_id: 222388
user_id: 8542392


In [9]:
for r in recipes.take(1):
  print(r)

tf.Tensor(222388, shape=(), dtype=int64)


Build vocabularies to convert user ids and movie titles into integer indices for embedding layers:

In [12]:
user_ids_vocabulary = tf.keras.layers.IntegerLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

recipe_id_vocabulary = tf.keras.layers.IntegerLookup(mask_token=None)
recipe_id_vocabulary.adapt(recipes)

In [13]:
user_ids_vocabulary(8542392)

<tf.Tensor: shape=(), dtype=int64, numpy=2416>

In [14]:
recipe_id_vocabulary(222388)

<tf.Tensor: shape=(), dtype=int64, numpy=32>

## Define a model
We can define a TFRS model by inheriting from tfrs.Model and implementing the compute_loss method:

In [15]:
class RecipeModel(tfrs.Model):
  # We derive from a custom base class to help reduce boilerplate. Under the hood,
  # these are still plain Keras Models.

  def __init__(
      self,
      user_model: tf.keras.Model,
      recipe_model: tf.keras.Model,
      task: tfrs.tasks.Retrieval):
    super().__init__()

    # Set up user and movie representations.
    self.user_model = user_model
    self.recipe_model = recipe_model

    # Set up a retrieval task.
    self.task = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # Define how the loss is computed.

    user_embeddings = self.user_model(features["user_id"])
    recipe_embeddings = self.recipe_model(features["recipe_id"])

    return self.task(user_embeddings, recipe_embeddings)

### Define the two models and the retrieval task.

In [18]:
# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocabulary_size(), 64)
])
recipe_model = tf.keras.Sequential([
    recipe_id_vocabulary,
    tf.keras.layers.Embedding(recipe_id_vocabulary.vocabulary_size(), 64)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    recipes.batch(128).map(recipe_model)
  )
)


### Fit and evaluate it.

Create the model, train it, and generate predictions:

In [41]:
test_user_idx = 2416

In [None]:
# Create a retrieval model.
model = RecipeModel(user_model, recipe_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

In [26]:
with mlflow.start_run() as run:
  # Train for 3 epochs.
  model.fit(ratings.batch(4096), epochs=3)
  mlflow.log_param("dataset", "mock_100")


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [24]:




# Use brute-force search to set up retrieval using the trained representations.
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    recipes.batch(100).map(lambda recipe_id: (recipe_id, model.recipe_model(recipe_id))))

# Get some recommendations.
_, titles = index(np.array([2416]))
print(f"Top 3 recommendations for user 2416: {titles[0, :3]}")

Epoch 1/3
Epoch 2/3
Epoch 3/3
Top 3 recommendations for user 2416: [218939 233443  23187]


In [37]:
# Get some recommendations.
_, titles = index(np.array([test_user_idx]))
print(f"Top 3 recommendations for user {test_user_idx}: {titles[0, :3]}")

Top 3 recommendations for user 2416: [218939 233443  23187]


In [42]:
ratings_data[ratings_data.user_id == 8542392]

Unnamed: 0.1,Unnamed: 0,user_id,recipe_id,rating,dateLastModified
0,0,8542392,222388,5,2017-04-22T12:46:43.663\n
