# Usage example in Google Colab

In [None]:
! pip install transformers==4.41.2 pymupdf chromadb

In [None]:
! rm -rf multimodal-rag-helper

In [None]:
! git clone https://github.com/danielemansillo/multimodal-rag-helper.git

In [None]:
import sys

sys.path.insert(0, "/content/multimodal-rag-helper")

In [None]:
import json
from pathlib import Path
from typing import List

import chromadb
from IPython import display
from PIL import Image

from document_processing import FolderProcessor
from models import E5V_Embedder

## Initialize Folder Processor

In [None]:
folder_path = Path("/content/multimodal-rag-helper/presentations/")
folder_processor: FolderProcessor = FolderProcessor(folder_path)

## Initialize Embedder

In [None]:
# I leave all the default values in the constructor
e5v_embedder: E5V_Embedder = E5V_Embedder()

## Initialize ChromaDB

In [None]:
# Initialize the client
client = chromadb.PersistentClient(path="chromadb")

# Create or get a collection for text and/or image embeddings
collection = client.get_or_create_collection(
    # l2 is the default
    name="my_collection", metadata={"hnsw:space": "cosine"})

## Create and save the embeddings in Chroma

In [None]:
queries: List[str] = ["What are some good advices for the title slide?"]
queries_embedding_tensor = e5v_embedder.embed_texts(queries)
# Convert to list for the search in Chroma
queries_embedding_list = queries_embedding_tensor.tolist()

for document in folder_processor.documents:
    image_embeddings = e5v_embedder.embed_images([image.content for image in document.all_images], batch_size=1)
    text_embeddings = e5v_embedder.embed_texts([text.content for text in document.all_texts])

    document.set_image_embeddings(image_embeddings)
    document.set_text_embeddings(text_embeddings)

    # Since we produced only the embeddings for the pages we only get those
    doc_embedding_records = document.get_embedding_records()
    image_embedding_records = doc_embedding_records["image"]
    text_embedding_records = doc_embedding_records["text"]

    collection.add(**image_embedding_records)
    collection.add(**text_embedding_records)

## Perform search and retrieval

In [None]:
# Perform the search in the vector db
image_results = collection.query(query_embeddings=queries_embedding_list[0], n_results=3, where={"type": "image"})
text_results = collection.query(query_embeddings=queries_embedding_list[0], n_results=3, where={"type": "text"})
all_results = collection.query(query_embeddings=queries_embedding_list[0], n_results=3)

In [None]:
# Show the iamge results as images
for result in image_results["metadatas"][0]:
    image = Image.open(result["image_path"])
    print(result)
    display.display(image)

In [None]:
# Show the image results
print(json.dumps(image_results,indent=2))

In [None]:
# Show the text results
print(json.dumps(text_results,indent=2))

In [None]:
# Show all results
print(json.dumps(all_results,indent=2))