In [1]:
import os
from lightrag.llm.gemini import gemini_complete_if_cache, gemini_embed
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
from raganything import RAGAnything, RAGAnythingConfig

provider = "openai"  # "google_genai" / "openai"

if provider == "google_genai":
    api_key = os.getenv("GOOGLE_API_KEY")
elif provider == "openai":
    api_key = os.getenv("OPENAI_API_KEY")



In [2]:
config = RAGAnythingConfig(
    working_dir="../rag_storage",
    parser="docling",  # document parser (mineru or docling)
    parse_method="txt",  # auto/ocr/txt
    enable_image_processing=False,
    enable_table_processing=False,
    enable_equation_processing=False,
)

In [3]:
def llm_model_func(prompt, system_prompt=None, history_messages=None, **kwargs):
    if history_messages is None:
        history_messages = []

    if provider == "google_genai":
        return gemini_complete_if_cache(
            "gemini-2.5-flash",
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
            api_key=api_key,
            **kwargs,
        )
    elif provider == "openai":
        return openai_complete_if_cache(
            "gpt-5-mini",  # model name
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
            api_key=api_key,
            **kwargs,
        )

In [4]:
import numpy as np
from openai import AsyncOpenAI

if provider == "google_genai":
    async def gemini_embedding(texts):
        return await gemini_embed(
            texts,
            model="models/gemini-embedding-001",
            api_key=api_key,
        )

    embedding_func = EmbeddingFunc(
        embedding_dim=768,
        max_token_size=8192,
        func=gemini_embedding,
    )
elif provider == "openai":
    client = AsyncOpenAI(api_key=api_key)

    async def openai_embedding(texts):
        single_input = isinstance(texts, str)
        if single_input:
            texts = [texts]

        resp = await client.embeddings.create(
            model="text-embedding-3-large",
            input=texts,
        )
        
        data_sorted = sorted(resp.data, key=lambda r: r.index)
        matrix = np.asarray([row.embedding for row in data_sorted], dtype=np.float32)

        if matrix.shape[0] != len(texts):
            raise ValueError(
                f"Vector count mismatch: expected {len(texts)} vectors but got {matrix.shape[0]} vectors."
            )

        # Return 1D for single text, 2D for batch
        return matrix[0] if single_input else matrix

    embedding_func = EmbeddingFunc(
        embedding_dim=3072,
        max_token_size=8192,
        func=openai_embedding,
    )

In [5]:
rag = RAGAnything(
    config=config,
    llm_model_func=llm_model_func,
    embedding_func=embedding_func,
)

INFO: RAGAnything initialized with config:
INFO:   Working directory: ../rag_storage
INFO:   Parser: docling
INFO:   Parse method: txt
INFO:   Multimodal processing - Image: False, Table: False, Equation: False
INFO:   Max concurrent files: 1


In [6]:
file_path = "C:\\Dev\\EPO_Patent_PDFs\\EP11869524NWA1.pdf"
# file_path = "..\\pdf\\returul_unui_produs.pdf"

await rag.process_document_complete(file_path, output_dir="../output")

INFO: Parser 'docling' installation verified
INFO: Initializing LightRAG with parameters: {'working_dir': '../rag_storage'}
INFO: [] Loaded graph from ../rag_storage\graph_chunk_entity_relation.graphml with 23 nodes, 25 edges
INFO:nano-vectordb:Load (23, 3072) data
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': '../rag_storage\\vdb_entities.json'} 23 data
INFO:nano-vectordb:Load (25, 3072) data
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': '../rag_storage\\vdb_relationships.json'} 25 data
INFO:nano-vectordb:Load (1, 3072) data
INFO:nano-vectordb:Init {'embedding_dim': 3072, 'metric': 'cosine', 'storage_file': '../rag_storage\\vdb_chunks.json'} 1 data
INFO: [] Process 23236 KV load full_docs with 1 records
INFO: [] Process 23236 KV load text_chunks with 1 records
INFO: [] Process 23236 KV load full_entities with 1 records
INFO: [] Process 23236 KV load full_relations with 1 records
INFO: [] Process 23236 KV load 

In [11]:
answer = await rag.aquery("Cum vor fi rambursati banii?")
print(answer)

INFO: Executing text query: Cum vor fi rambursati banii?...
INFO: Query mode: mix
INFO:  == LLM cache == saving: mix:keywords:0bf408b51c70a374a32852272f2c2b72
INFO: Query nodes: Refund method, Bank transfer, Credit card refund, Processing time, Transaction ID, Refund amount (top_k:40, cosine:0.2)
INFO: Local query: 21 entites, 25 relations
INFO: Query edges: Refund process, Money reimbursement, Refund policy, Refund timeline (top_k:40, cosine:0.2)
INFO: Global query: 23 entites, 25 relations
INFO: Naive query: 1 chunks (chunk_top_k:20 cosine:0.2)
INFO: Raw search results: 23 entities, 25 relations, 1 vector chunks
INFO: After truncation: 23 entities, 25 relations
INFO: Selecting 1 from 1 entity-related chunks by vector similarity
INFO: Find no additional relations-related chunks from 25 relations
INFO: Round-robin merged chunks: 2 -> 1 (deduplicated 1)
INFO: Final context: 23 entities, 25 relations, 1 chunks
INFO: Final chunks S+F/O: E23/1
INFO:  == LLM cache == saving: mix:query:160c0

### Rambursarea banilor

- **Metoda de rambursare:** Suma va fi restituită, în mod normal, prin aceeași metodă de plată folosită la plasarea comenzii. Dacă ai solicitat altfel, rambursarea se poate face conform cererii tale.  
- **Plăți online cu cardul:** În cazul plăților efectuate online cu cardul, suma este returnată direct în contul bancar asociat cardului utilizat.  
- **Termen:** Contravaloarea produsului returnat va fi rambursată în termen de **14 zile calendaristice** de la confirmarea aprobării returului.  
- **Confirmarea aprobării returului:** Rambursarea este inițiată după ce returul este aprobat (după verificarea produselor de către echipă, conform procedurii de retur).

### References

- [1] returul_unui_produs.pdf
