In [4]:
from sentence_transformers import SentenceTransformer
import time

# Sentence transformer using multilingual (2.185 sec)
query = "Find me a flowy trails with some jumps"
# model_id = "sentence-transformers/stsb-xlm-r-multilingual"
model_id = "stsb-xlm-r-multilingual"
start = time.time()
model = SentenceTransformer(model_id)
embeddings = model.encode(query)
end = time.time()
total = end - start
print(f"Sentence transformer time = {total}")
print("Embeddings:")
print(embeddings)
print("\n")

Sentence transformer time = 0.8429811000823975
Embeddings:
[ 3.43843997e-01 -6.49826050e-01  7.12332368e-01  8.89931977e-01
  2.07846388e-01 -5.14178157e-01 -1.74151342e-02 -2.01479420e-01
  3.88172209e-01 -7.90236890e-02 -9.82687026e-02  4.79609996e-01
  1.96560532e-01 -9.28486288e-02  4.76859421e-01 -7.61418462e-01
 -2.58376539e-01 -3.74922097e-01  7.50732064e-01 -4.09680218e-01
 -8.03902224e-02 -5.61707795e-01  6.56428412e-02  1.70710698e-01
 -2.36059517e-01  1.35743972e-02 -5.83963841e-02 -1.82366446e-01
 -8.12228560e-01 -1.18710123e-01 -9.44830105e-02  7.76519477e-02
  8.26527536e-01 -2.48041764e-01 -4.40473258e-01 -3.53244618e-02
  2.91002780e-01 -5.90482533e-01  1.31753027e-01 -1.15120387e+00
 -1.50210992e-01  3.88493955e-01 -3.42855126e-01 -4.57819313e-01
 -8.08842003e-01  1.05897352e-01 -5.59503615e-01  3.55086803e-01
 -9.18948710e-01  2.60473877e-01 -5.68579853e-01  5.66671371e-01
 -6.13074839e-01  1.11307897e-01 -1.62532657e-01  5.50412297e-01
 -2.62184441e-01 -4.56389397e-0

In [3]:
from transformers import AutoTokenizer, AutoModel
import torch

#Mean pooling - take attention mask into consideration
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] # first elem of model output contains token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

query = "Find me a flowy trails with some jumps"
model_id = "sentence-transformers/stsb-xlm-r-multilingual"

# load hugging pretrain
start = time.time()
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id)

# tokenize the query
encoded_input = tokenizer(query, padding=True, truncation=True, return_tensors='pt')

# compute the token embeddings with no gradient
with torch.no_grad():
    model_output = model(**encoded_input)

# pool the model output
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
end = time.time()
total = end - start
print(f"Total time = {total}")
print("Sentence embeddings:")
print(sentence_embeddings)
print("\n")

tokenizer_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/709 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Total time = 50.466275215148926
Sentence embeddings:
tensor([[ 3.4384e-01, -6.4983e-01,  7.1233e-01,  8.8993e-01,  2.0785e-01,
         -5.1418e-01, -1.7415e-02, -2.0148e-01,  3.8817e-01, -7.9024e-02,
         -9.8269e-02,  4.7961e-01,  1.9656e-01, -9.2849e-02,  4.7686e-01,
         -7.6142e-01, -2.5838e-01, -3.7492e-01,  7.5073e-01, -4.0968e-01,
         -8.0390e-02, -5.6171e-01,  6.5643e-02,  1.7071e-01, -2.3606e-01,
          1.3574e-02, -5.8396e-02, -1.8237e-01, -8.1223e-01, -1.1871e-01,
         -9.4483e-02,  7.7652e-02,  8.2653e-01, -2.4804e-01, -4.4047e-01,
         -3.5324e-02,  2.9100e-01, -5.9048e-01,  1.3175e-01, -1.1512e+00,
         -1.5021e-01,  3.8849e-01, -3.4286e-01, -4.5782e-01, -8.0884e-01,
          1.0590e-01, -5.5950e-01,  3.5509e-01, -9.1895e-01,  2.6047e-01,
         -5.6858e-01,  5.6667e-01, -6.1307e-01,  1.1131e-01, -1.6253e-01,
          5.5041e-01, -2.6218e-01, -4.5639e-01, -4.0769e-01,  6.6431e-01,
          5.5015e-01, -5.0814e-01,  1.3715e-01,  5.8066e-01