In [None]:
import asyncio
from raganything import RAGAnything, RAGAnythingConfig
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
# Create RAGAnything configuration
config = RAGAnythingConfig(
    working_dir="./rag_storage",
    mineru_parse_method="auto",
    enable_image_processing=True,
    enable_table_processing=True,
    enable_equation_processing=True,
)

# Define LLM model function
def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
    return openai_complete_if_cache(
        "deepseek-chat",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        api_key=api_key,
        base_url=base_url,
        **kwargs,
    )

# Define vision model function for image processing
def vision_model_func(
    prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
):
    if image_data:
        return openai_complete_if_cache(
            "gpt-4o",
            "",
            system_prompt=None,
            history_messages=[],
            messages=[
                {"role": "system", "content": system_prompt}
                if system_prompt
                else None,
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{image_data}"
                            },
                        },
                    ],
                }
                if image_data
                else {"role": "user", "content": prompt},
            ],
            api_key=api_key,
            base_url=base_url,
            **kwargs,
        )
    else:
        return llm_model_func(prompt, system_prompt, history_messages, **kwargs)

# Define embedding function
embedding_func = EmbeddingFunc(
    embedding_dim=3072,
    max_token_size=8192,
    func=lambda texts: openai_embed(
        texts,
        model="text-embedding-3-large",
        api_key=api_key,
        base_url=base_url,
    ),
)

# Initialize RAGAnything
rag = RAGAnything(
    config=config,
    llm_model_func=llm_model_func,
    vision_model_func=vision_model_func,
    embedding_func=embedding_func,
)

# Process a document
await rag.process_document_complete(
    file_path="demo_data/Benefit_Options.pdf",
    output_dir="./output",
    parse_method="auto"
)

# Query the processed content
# Pure text query - for basic knowledge base search
text_result = await rag.aquery(
    "What are the main findings shown in the figures and tables?",
    mode="hybrid"
)
print("Text query result:", text_result)

# Multimodal query with specific multimodal content
multimodal_result = await rag.aquery_with_multimodal(
"Explain this formula and its relevance to the document content",
multimodal_content=[{
    "type": "equation",
    "latex": "P(d|q) = \\frac{P(q|d) \\cdot P(d)}{P(q)}",
    "equation_caption": "Document relevance probability"
}],
mode="hybrid"
)
print("Multimodal query result:", multimodal_result)

  func.__dict__.update(f.__dict__)
Rerank is enabled but no rerank_model_func provided. Reranking will be skipped.
OpenAI API Call Failed,
Model: gpt-4o-mini,
Params: {}, Got: Error code: 400 - {'error': {'message': 'Model Not Exist', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_request_error'}}
limit_async: Error in decorated function: Error code: 400 - {'error': {'message': 'Model Not Exist', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_request_error'}}


BadRequestError: Error code: 400 - {'error': {'message': 'Model Not Exist', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_request_error'}}