# Retrieval with FastEmbed

This notebook demonstrates the basic usage of Fastembed to perform vector search and retrieval.

In [1]:
from typing import List
import numpy as np
from fastembed.embedding import FlagEmbedding as Embedding

## Data Preparation
💡 Tip: Prefer using query_embed for queries and passage_embed for documents.

In [2]:
# Example list of documents
documents: List[str] = [
    "Maharana Pratap was a Rajput warrior king from Mewar",
    "He fought against the Mughal Empire led by Akbar",
    "The Battle of Haldighati in 1576 was his most famous battle",
    "He refused to submit to Akbar and continued guerrilla warfare",
    "His capital was Chittorgarh, which he lost to the Mughals",
    "He died in 1597 at the age of 57",
    "Maharana Pratap is considered a symbol of Rajput resistance against foreign rule",
    "His legacy is celebrated in Rajasthan through festivals and monuments",
    "He had 11 wives and 17 sons, including Amar Singh I who succeeded him as ruler of Mewar",
    "His life has been depicted in various films, TV shows, and books",
]
# Initialize the DefaultEmbedding class with the desired parameters
embedding_model = Embedding(model_name="BAAI/bge-small-en", max_length=512)

# We'll use the passage_embed method to get the embeddings for the documents
embeddings: List[np.ndarray] = list(
    embedding_model.passage_embed(documents)
)  # notice that we are casting the generator to a list

print(embeddings[0].shape, len(embeddings))

(384,) 10


## Querying

In [3]:
%%time

query = "Who was Maharana Pratap?"
query_embedding = list(embedding_model.query_embed(query))[0]
plain_query_embedding = list(embedding_model.embed(query))[0]

query_embedding.shape, plain_query_embedding.shape

CPU times: user 45 ms, sys: 2.5 ms, total: 47.5 ms
Wall time: 29.6 ms


((384,), (384,))

In [4]:
def print_top_k(query_embedding, embeddings, documents, k=5):
    # use numpy to calculate the cosine similarity between the query and the documents
    scores = np.dot(embeddings, query_embedding)
    # sort the scores in descending order
    sorted_scores = np.argsort(scores)[::-1]
    # print the top 5
    for i in range(k):
        print(f"Rank {i+1}: {documents[sorted_scores[i]]}")

In [5]:
# 1 * 4 + 2 * 5 + 3 * 6
np.dot([1,2,3], [4,5,6])

32

In [6]:
np.argmax([0.1, 0.2, 0.5, 0.3])

2

In [7]:
np.argsort([0.1, 0.2, 0.5, 0.3])

array([0, 1, 3, 2])

In [8]:
print_top_k(query_embedding, embeddings, documents)

Rank 1: Maharana Pratap was a Rajput warrior king from Mewar
Rank 2: Maharana Pratap is considered a symbol of Rajput resistance against foreign rule
Rank 3: His legacy is celebrated in Rajasthan through festivals and monuments
Rank 4: He had 11 wives and 17 sons, including Amar Singh I who succeeded him as ruler of Mewar
Rank 5: He fought against the Mughal Empire led by Akbar


In [9]:
print_top_k(plain_query_embedding, embeddings, documents)

Rank 1: Maharana Pratap was a Rajput warrior king from Mewar
Rank 2: Maharana Pratap is considered a symbol of Rajput resistance against foreign rule
Rank 3: His legacy is celebrated in Rajasthan through festivals and monuments
Rank 4: He had 11 wives and 17 sons, including Amar Singh I who succeeded him as ruler of Mewar
Rank 5: He fought against the Mughal Empire led by Akbar


In [10]:
np.dot(query_embedding, plain_query_embedding)

0.95818377

## Evaluation

In [11]:
from mteb import MTEB

MTEB().available_tasks

  from .autonotebook import tqdm as notebook_tqdm


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


['BUCC',
 'BornholmBitextMining',
 'Tatoeba',
 'AmazonCounterfactualClassification',
 'AmazonPolarityClassification',
 'AmazonReviewsClassification',
 'AngryTweetsClassification',
 'Banking77Classification',
 'DalajClassification',
 'DanishPoliticalCommentsClassification',
 'DKHateClassification',
 'EmotionClassification',
 'ImdbClassification',
 'LccSentimentClassification',
 'MassiveIntentClassification',
 'MassiveScenarioClassification',
 'MTOPDomainClassification',
 'MTOPIntentClassification',
 'NoRecClassification',
 'NordicLangClassification',
 'NorwegianParliament',
 'ScalaDaClassification',
 'ScalaNbClassification',
 'ScalaNbClassification',
 'ScalaSvClassification',
 'SweRecClassification',
 'ToxicConversationsClassification',
 'TweetSentimentExtractionClassification',
 'TNews',
 'IFlyTek',
 'MultilingualSentiment',
 'JDReview',
 'OnlineShopping',
 'Waimai',
 'CBD',
 'PolEmo2.0-IN',
 'PolEmo2.0-OUT',
 'AllegroReviews',
 'PAC',
 'ArxivClusteringP2P',
 'ArxivClusteringS2S',
 'Bi

In [28]:
from sentence_transformers import SentenceTransformer

model_name = "BAAI/bge-small-en"
model = SentenceTransformer(model_name)

evaluation = MTEB(tasks=["Banking77Classification"])
results = evaluation.run(model, output_folder=f"results/{model_name}")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [15]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(model_name)

evaluation = MTEB(tasks=["Banking77Classification"])
results = evaluation.run(model, output_folder=f"results/{model_name}")

In [30]:
import json


with open(f"results/BAAI/bge-small-en/Banking77Classification.json") as f:
    print(f.name)
    print(json.dumps(json.load(f), indent=2))
    
with open(f"results/sentence-transformers/all-MiniLM-L6-v2/Banking77Classification.json") as f:
    print(f.name)
    print(json.dumps(json.load(f), indent=2))

results/BAAI/bge-small-en/Banking77Classification.json
{
  "dataset_revision": "0fd18e25b25c072e09e0d92ab615fda904d66300",
  "mteb_dataset_name": "Banking77Classification",
  "mteb_version": "1.1.1",
  "test": {
    "accuracy": 0.795422077922078,
    "accuracy_stderr": 0.006097416428071673,
    "evaluation_time": 131.81,
    "f1": 0.7850062896493155,
    "f1_stderr": 0.0068895723757598165,
    "main_score": 0.795422077922078
  }
}
results/sentence-transformers/all-MiniLM-L6-v2/Banking77Classification.json
{
  "dataset_revision": "0fd18e25b25c072e09e0d92ab615fda904d66300",
  "mteb_dataset_name": "Banking77Classification",
  "mteb_version": "1.1.1",
  "test": {
    "accuracy": 0.8006168831168832,
    "accuracy_stderr": 0.00782656117814138,
    "evaluation_time": 73.43,
    "f1": 0.7940375254626659,
    "f1_stderr": 0.00891542230698871,
    "main_score": 0.8006168831168832
  }
}
