# Advanced Retrieval 101 Multilingual and Multimodal Search with LlamaIndex

https://qdrant.tech/documentation/multimodal-search/

## Vectorize Data

In [19]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

model = HuggingFaceEmbedding(
    model_name="llamaindex/vdr-2b-multi-v1",
    device="mps",  # "mps" for mac, "cuda" for nvidia GPUs, "cpu"
    trust_remote_code=True,
)


documents = [
    {
        "caption": "An image about plane emergency safety",
        "image": "images/place_emergency.jpg",
    },
    {
        "caption": "An image about airplane components.",
        "image": "images/airplane_parts.png",
    },
    {
        "caption": "An image about COVID safety restrictions.",
        "image": "images/coronavirus-safety.jpg",
    },
    {
        "caption": "A confidential image about UFO sightings",
        "image": "images/ufo_sightings.jpeg",
    },
    {
        "caption": "An image about US stock market news",
        "image": "images/us_stock_market_news.png",
    },
]

ValueError: size must contain 'shortest_edge' and 'longest_edge' keys.

In [None]:
text_embeddings = model.get_text_embedding_batch([doc["caption"] for doc in documents])
image_embeddings = model.get_image_embedding_batch([doc["image"] for doc in documents])

In [None]:
from qdrant_client import QdrantClient, models

client = QdrantClient(":memory:")
if not client.collection_exists("llama-multi"):
    client.create_collection(
        collection_name="llama-multi",
        vectors_config={
            "image": models.VectorParams(
                size=len(image_embeddings[0]), distance=models.Distance.COSINE
            ),
            "text": models.VectorParams(
                size=len(text_embeddings[0]), distance=models.Distance.COSINE
            ),
        },
    )

In [None]:
client.upload_points(
    collection_name="llama-multi",
    points=[
        models.PointStruct(
            id=idx,
            vector={
                "text": text_embeddings[idx],
                "image": image_embeddings[idx],
            },
            payload=doc,
        )
        for idx, doc in enumerate(documents)
    ],
)

## Search

### Text-to-Image

In [None]:
from PIL import Image

find_image = model.get_query_embedding("Bullish on US Equity")

Image.open(
    client.query_points(
        collection_name="llama-multi",
        query=find_image,
        using="image",
        with_payload=["image"],
        limit=1,
    )
    .points[0]
    .payload["image"]
)

### Multilingual Search

In [None]:
Image.open(
    client.query_points(
        collection_name=COLLECTION_NAME,
        # German: Tell me about the mysterious object
        query=model.get_query_embedding("erzähl mir von dem mysteriösen Objekt"),
        using="image",
        with_payload=["image"],
        limit=1,
    )
    .points[0]
    .payload["image"]
)