In [1]:
import os
from lightrag.llm.gemini import gemini_complete_if_cache, gemini_embed
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
from raganything import RAGAnything, RAGAnythingConfig

provider = "openai"  # "google_genai" / "openai"

if provider == "google_genai":
    api_key = os.getenv("GOOGLE_API_KEY")
elif provider == "openai":
    api_key = os.getenv("OPENAI_API_KEY")



In [2]:
config = RAGAnythingConfig(
    working_dir="../rag_storage",
    parser="docling",  # document parser (mineru or docling)
    parse_method="txt",  # auto/ocr/txt
    enable_image_processing=False,
    enable_table_processing=False,
    enable_equation_processing=False,
)

In [None]:
def llm_model_func(prompt, system_prompt=None, history_messages=None, **kwargs):
    if history_messages is None:
        history_messages = []

    if provider == "google_genai":
        return gemini_complete_if_cache(
            "gemini-2.5-flash",
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
            api_key=api_key,
            **kwargs,
        )
    elif provider == "openai":
        return openai_complete_if_cache(
            "gpt-5-mini",  # model name
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
            api_key=api_key,
            **kwargs,
        )

In [4]:
if provider == "google_genai":
    async def gemini_embedding(texts):
        return await gemini_embed(
            texts,
            model="models/gemini-embedding-001",
            api_key=api_key,
        )

    embedding_func = EmbeddingFunc(
        embedding_dim=768,
        max_token_size=8192,
        func=gemini_embedding,
    )
elif provider == "openai":
    async def openai_embedding(texts):
        return await openai_embed(
            texts,
            model="text-embedding-3-large",
            api_key=api_key,
        )

    embedding_func = EmbeddingFunc(
        embedding_dim=3072,
        max_token_size=8192,
        func=openai_embedding,
    )

In [5]:
rag = RAGAnything(
    config=config,
    llm_model_func=llm_model_func,
    embedding_func=embedding_func,
)

INFO: RAGAnything initialized with config:
INFO:   Working directory: ../rag_storage
INFO:   Parser: docling
INFO:   Parse method: txt
INFO:   Multimodal processing - Image: False, Table: False, Equation: False
INFO:   Max concurrent files: 1


In [None]:
await rag.process_document_complete(
    file_path="C:\\Dev\\EPO_Patent_PDFs\\EP11869524NWA1.pdf", output_dir="../output"
)

INFO: Parser 'docling' installation verified
INFO: Initializing LightRAG with parameters: {'working_dir': '../rag_storage'}
INFO: [] Created new empty graph file: ../rag_storage\graph_chunk_entity_relation.graphml
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': '../rag_storage\\vdb_entities.json'} 0 data
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': '../rag_storage\\vdb_relationships.json'} 0 data
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': '../rag_storage\\vdb_chunks.json'} 0 data
INFO: [] Process 18364 KV load full_docs with 0 records
INFO: [] Process 18364 KV load text_chunks with 0 records
INFO: [] Process 18364 KV load full_entities with 0 records
INFO: [] Process 18364 KV load full_relations with 0 records
INFO: [] Process 18364 KV load entity_chunks with 0 records
INFO: [] Process 18364 KV load relation_chunks with 0 records
INFO: [] Process 18364 KV load llm_respons

ERROR: Embedding func: Error in decorated function for task 1880395524320_38151.906: Vector count mismatch: expected 10 vectors but got 20 vectors (from embedding result).
ERROR: Embedding func: Error in decorated function for task 1880395523280_38151.921: Vector count mismatch: expected 10 vectors but got 20 vectors (from embedding result).
