In [36]:
from transformers import AutoTokenizer, TFAutoModel
import numpy as np
import typing
import tensorflow as tf

In [4]:
SEMANTIC_MODEL = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'

In [38]:
class Transformer:

    def __init__(
            self,
            model:str = SEMANTIC_MODEL):
        
        self.tokenizer = AutoTokenizer.from_pretrained(model)    
        self.model = TFAutoModel.from_pretrained(model)

    def _encode(self, text):
        text_tensor = self.tokenizer.encode(text, return_tensors="tf")
        output = self.model(text_tensor)
        return output

    def encode(self, text):
        return np.array([self._encode(string) for string in text])

    

In [41]:
class TFSentenceTransformer(tf.keras.layers.Layer):
    def __init__(self, model_name_or_path, **kwargs):
        super(TFSentenceTransformer, self).__init__()
        # loads transformers model
        self.model = TFAutoModel.from_pretrained(model_name_or_path, **kwargs)

    def call(self, inputs):
        # runs model on inputs
        model_output = self.model(inputs)
        return model_output

In [39]:
transformer = Transformer()

All model checkpoint layers were used when initializing TFBertModel.

All the layers of TFBertModel were initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [34]:
transformer.encode(['Daniel', 'Perez']).shape[0]

[1;36m2[0m

In [45]:
TFSentenceTransformer(SEMANTIC_MODEL).call(['Daniel'])

All model checkpoint layers were used when initializing TFBertModel.

All the layers of TFBertModel were initialized from the model checkpoint at sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.
