# How to generate Text Embeddings with FastEmbed

In [1]:
from fastembed import TextEmbedding

In [63]:
documents: [str] = [
    "FastEmbed is lighter than Transformers & Sentence-Transformers.",
    "FastEmbed is supported by and maintained by Qdrant.",
    "I have a cat",
]

In [64]:
embedding_model = TextEmbedding()
embedding_model.model_name

'BAAI/bge-small-en-v1.5'

In [65]:
embeddings_generator = embedding_model.embed(documents)
embeddings_list = list(embeddings_generator)
embeddings_list[0][:10]

array([-0.09479211,  0.01008398, -0.03087804,  0.02379127,  0.00236447,
        0.00065356, -0.08248352,  0.00084713,  0.03719218,  0.01438666],
      dtype=float32)

## Calculating similarity

In [66]:
import numpy as np
from qdrant_client import models
from qdrant_client.local.distances import (
    calculate_distance,
    cosine_similarity,
    dot_product,
    euclidean_distance,
    manhattan_distance,
)

In [67]:
distance_types = [
    cosine_similarity,
    dot_product,
    euclidean_distance,
    manhattan_distance,
]

In [68]:
embeddings = np.array(embeddings_list)

for distance_type in distance_types:
    print(distance_type.__name__, distance_type(embeddings[:1], embeddings[1:]))

cosine_similarity [[0.6716511 0.4618737]]
dot_product [[0.6716511 0.4618737]]
euclidean_distance [[0.810369 1.037426]]
manhattan_distance [[12.591358 16.071075]]


In [69]:
for distance_type in list(models.Distance):
    print(
        distance_type, calculate_distance(embeddings[:1], embeddings[1:], distance_type)
    )

Cosine [[0.6716511 0.4618737]]
Euclid [[0.810369 1.037426]]
Dot [[0.6716511 0.4618737]]
Manhattan [[12.591358 16.071075]]


## Finding similarity

In [95]:
query = "I need a pet"

embeddings_generator = embedding_model.embed(query)
embeddings_list = list(embeddings_generator)

In [96]:
scores = calculate_distance(
    np.array(embeddings_list), embeddings, distance_type=models.Distance.COSINE
)
scores

array([[0.433192  , 0.39380783, 0.77847326]], dtype=float32)

In [97]:
recommendations = sorted(zip(documents, scores[0]), key=lambda x: x[1], reverse=True)
for doc, score in recommendations:
    print(doc, score)

I have a cat 0.77847326
FastEmbed is lighter than Transformers & Sentence-Transformers. 0.433192
FastEmbed is supported by and maintained by Qdrant. 0.39380783
