# Reranking with fastembed


https://qdrant.tech/documentation/fastembed/fastembed-rerankers/

In [7]:
from fastembed import TextEmbedding
from fastembed.rerank.cross_encoder import TextCrossEncoder

In [9]:
[model["model"] for model in TextCrossEncoder.list_supported_models()]

['Xenova/ms-marco-MiniLM-L-6-v2',
 'Xenova/ms-marco-MiniLM-L-12-v2',
 'BAAI/bge-reranker-base',
 'jinaai/jina-reranker-v1-tiny-en',
 'jinaai/jina-reranker-v1-turbo-en',
 'jinaai/jina-reranker-v2-base-multilingual']

In [12]:
dense_embedding_model = TextEmbedding("sentence-transformers/all-MiniLM-L6-v2")
reranker = TextCrossEncoder(model_name="jinaai/jina-reranker-v2-base-multilingual")

## Embed and index data for the first-stage retrieval

In [11]:
descriptions = [
    "In 1431, Jeanne d'Arc is placed on trial on charges of heresy. The ecclesiastical jurists attempt to force Jeanne to recant her claims of holy visions.",
    "A film projectionist longs to be a detective, and puts his meagre skills to work when he is framed by a rival for stealing his girlfriend's father's pocketwatch.",
    "A group of high-end professional thieves start to feel the heat from the LAPD when they unknowingly leave a clue at their latest heist.",
    "A petty thief with an utter resemblance to a samurai warlord is hired as the lord's double. When the warlord later dies the thief is forced to take up arms in his place.",
    "A young boy named Kubo must locate a magical suit of armour worn by his late father in order to defeat a vengeful spirit from the past.",
    "A biopic detailing the 2 decades that Punjabi Sikh revolutionary Udham Singh spent planning the assassination of the man responsible for the Jallianwala Bagh massacre.",
    "When a machine that allows therapists to enter their patients' dreams is stolen, all hell breaks loose. Only a young female therapist, Paprika, can stop it.",
    "An ordinary word processor has the worst night of his life after he agrees to visit a girl in Soho whom he met that evening at a coffee shop.",
    "A story that revolves around drug abuse in the affluent north Indian State of Punjab and how the youth there have succumbed to it en-masse resulting in a socio-economic decline.",
    "A world-weary political journalist picks up the story of a woman's search for her son, who was taken away from her decades ago after she became pregnant and was forced to live in a convent.",
    "Concurrent theatrical ending of the TV series Neon Genesis Evangelion (1995).",
    "During World War II, a rebellious U.S. Army Major is assigned a dozen convicted murderers to train and lead them into a mass assassination mission of German officers.",
    "The toys are mistakenly delivered to a day-care center instead of the attic right before Andy leaves for college, and it's up to Woody to convince the other toys that they weren't abandoned and to return home.",
    "A soldier fighting aliens gets to relive the same day over and over again, the day restarting every time he dies.",
    "After two male musicians witness a mob hit, they flee the state in an all-female band disguised as women, but further complications set in.",
    "Exiled into the dangerous forest by her wicked stepmother, a princess is rescued by seven dwarf miners who make her part of their household.",
    "A renegade reporter trailing a young runaway heiress for a big story joins her on a bus heading from Florida to New York, and they end up stuck with each other when the bus leaves them behind at one of the stops.",
    "Story of 40-man Turkish task force who must defend a relay station.",
    "Spinal Tap, one of England's loudest bands, is chronicled by film director Marty DiBergi on what proves to be a fateful tour.",
    "Oskar, an overlooked and bullied boy, finds love and revenge through Eli, a beautiful but peculiar girl.",
]

In [16]:
descriptions_embeddings = list(dense_embedding_model.embed(descriptions))
len(descriptions_embeddings[0])

384

In [14]:
from qdrant_client import QdrantClient, models

qdrant_client = QdrantClient(":memory:")  # Qdrant is running from RAM.

In [18]:
qdrant_client.create_collection(
    collection_name="movies",
    vectors_config={
        "embedding": models.VectorParams(
            size=384, distance=models.Distance.COSINE  # Size of all-MiniLM-L6-v2
        )
    },
)

True

In [19]:
qdrant_client.upload_points(
    collection_name="movies",
    points=[
        models.PointStruct(
            id=idx, payload={"description": description}, vector={"embedding": vector}
        )
        for idx, (description, vector) in enumerate(
            zip(descriptions, descriptions_embeddings)
        )
    ],
)

## First-stage retrieval

In [20]:
query = "A story about a strong historically significant female figure."
query_embedded = list(dense_embedding_model.query_embed(query))[0]

initial_retrieval = qdrant_client.query_points(
    collection_name="movies",
    using="embedding",
    query=query_embedded,
    with_payload=True,
    limit=10,
)

In [21]:
description_hits = []
for i, hit in enumerate(initial_retrieval.points):
    print(f'Result number {i+1} is "{hit.payload['description']}"')
    description_hits.append(hit.payload["description"])

Result number 1 is "A world-weary political journalist picks up the story of a woman's search for her son, who was taken away from her decades ago after she became pregnant and was forced to live in a convent."
Result number 2 is "Exiled into the dangerous forest by her wicked stepmother, a princess is rescued by seven dwarf miners who make her part of their household."
Result number 3 is "Oskar, an overlooked and bullied boy, finds love and revenge through Eli, a beautiful but peculiar girl."
Result number 4 is "A renegade reporter trailing a young runaway heiress for a big story joins her on a bus heading from Florida to New York, and they end up stuck with each other when the bus leaves them behind at one of the stops."
Result number 5 is "A story that revolves around drug abuse in the affluent north Indian State of Punjab and how the youth there have succumbed to it en-masse resulting in a socio-economic decline."
Result number 6 is "After two male musicians witness a mob hit, they

In [22]:
new_scores = list(reranker.rerank(query, description_hits))
new_scores

[-1.7871119976043701,
 -1.1165943145751953,
 -1.2816978693008423,
 -1.8776179552078247,
 -2.721012830734253,
 -2.2455098628997803,
 -2.380913257598877,
 -2.8468973636627197,
 -2.8494515419006348,
 -0.6084094047546387]

In [24]:
ranking = [(i, score) for i, score in enumerate(new_scores)]
ranking.sort(key=lambda x: x[1], reverse=True)

for i, rank in enumerate(ranking):
    print(f'Reranked result number {i+1} is "{description_hits[rank[0]]}"')

Reranked result number 1 is "In 1431, Jeanne d'Arc is placed on trial on charges of heresy. The ecclesiastical jurists attempt to force Jeanne to recant her claims of holy visions."
Reranked result number 2 is "Exiled into the dangerous forest by her wicked stepmother, a princess is rescued by seven dwarf miners who make her part of their household."
Reranked result number 3 is "Oskar, an overlooked and bullied boy, finds love and revenge through Eli, a beautiful but peculiar girl."
Reranked result number 4 is "A world-weary political journalist picks up the story of a woman's search for her son, who was taken away from her decades ago after she became pregnant and was forced to live in a convent."
Reranked result number 5 is "A renegade reporter trailing a young runaway heiress for a big story joins her on a bus heading from Florida to New York, and they end up stuck with each other when the bus leaves them behind at one of the stops."
Reranked result number 6 is "After two male music