### vision RAG using [VARAG](https://github.com/adithya-s-k/VARAG)


[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/adithya-s-k/VARAG/blob/main/docs/visionRAG.ipynb)

Requirement to RUN this notebook - Min T4 GPU

In [None]:
!git clone https://github.com/adithya-s-k/VARAG
%cd VARAG
%pwd

In [None]:
!apt-get update && apt-get install -y && apt-get install -y poppler-utils

In [None]:
%pip install -e .

In [None]:
from sentence_transformers import SentenceTransformer
from varag.rag import VisionRAG
from varag.llms import OpenAI
import lancedb
import os
from dotenv import load_dotenv

os.environ["OPENAI_API_KEY"] = "api-key"

load_dotenv()

In [None]:
shared_db = lancedb.connect("~/shared_rag_db")

# Initialize VisionRAG and VLM
embedding_model = SentenceTransformer("jinaai/jina-clip-v1", trust_remote_code=True)

vision_rag = VisionRAG(
    image_embedding_model=embedding_model,
    db=shared_db,
    table_name="visionDemo",
)

vlm = OpenAI()

In [None]:
vision_rag.index(
        "./examples/data", 
        overwrite=False, 
        recursive=False, 
        verbose=True
    )

In [None]:
query = "What is Colpali"
num_results = 5

results = vision_rag.search(query, k=5)

images = [result["image"] for result in results]

# Display the images
for i, img in enumerate(images, 1):
    print(f"Image {i}:")
    display(img)

In [None]:
from IPython.display import display, Markdown


response = vlm.query(query, images, max_tokens=1000)


display(Markdown(response))

### Run Gradio Demo

In [None]:
%cd examples
!python visionDemo.py --share