# Installing modules
These line should be run only for colab

In [None]:
%%capture
%%bash
pip install --upgrade tensorflow
pip install scann
pip install -q tensorflow-recommenders
pip install -q tensorflow_hub
#these line should be run only on colab

# Importing modules
Import of:


*   Tensorflow
*   Tensorflow Recommenders: contains the ScaNN layer we are going to use
*   Tensorflow Hub: contains the embedding model we are going to pass to the ScaNN layer
*   os: for path managment


In [None]:
import tensorflow as tf
import tensorflow_recommenders as tfrs
import tensorflow_hub as hub

import os 
import shutil

# Creating the index

The following function creates the index given a dataset of embeddings.


In [None]:
def create_index(dataset, embedding_model = None):
  """
  :param dataset: a tensorflow dataset with the embedded version of the space we want to query in
  :param embedding_model: a tensorflow model(loaded with tf hub or thorugh keras load_model function) that embeds strings into multidimensional vectors. The embedder should be the same as the one used to create :param dataset:
  :return: the index model 
  """

  index = tfrs.layers.factorized_top_k.ScaNN() #this is the model without the 

  test_value = tf.constant(list(dataset.take(1).as_numpy_iterator()))
  if embedding_model is not None:
    index = tfrs.layers.factorized_top_k.ScaNN(query_model = embedding_model)
    test_value = tf.constat([''])

  index.index_from_dataset(dataset.batch(512))

  _ = index(test_value)

  return index

# Creating and saving the index in SaveModel format

The following function generates the servable version of the model.

In [None]:
def create_save(dataset:tf.Tensor, save_path:str, version:int = 1, embedding_model:tf.keras.Model = None, zip:bool = False):
  """
  :param dataset: a tensorflow dataset with the embedded version of the space we want to query in
  :param save_path: a path, as string, that saves the model 
  :param version: creates a subfolder to the 
  :param embedding_model: a tensorflow model(loaded with tf hub or thorugh keras load_model function) that embeds strings into multidimensional vectors. The embedder should be the same as the one used to create :param dataset:
  :param zip: a boolean, if it is true, creates a zip of the model saved
  :return: the index model 
  """

  index = create_index(dataset, embedding_model) #creating the index

  path = os.path.join(save_path, str(version)) #adding the version subfolder

  index.save(filepath = path) #saving the index
  
  if zip:
    shutil.make_archive(save_path, 'zip') #zipping if zip is True
  
  return index