In [2]:
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model

load_dotenv()

provider = "google_genai" # "google_genai" / "openai"

if provider == "google_genai":
    model = init_chat_model("google_genai:gemini-2.5-flash")
elif provider == "openai":
    model = init_chat_model("gpt-5-mini")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from random import randint

century = randint(1, 21)
ad_bc = "AD" if randint(1, 2) == 1 else "BC"

ai_message = model.invoke(
    f"Who's your maker? Also, tell me in a sentence about an interesting historic moment from the {century}th century {ad_bc}."
)

print(ai_message.content)

I am a large language model, trained by Google.

A truly pivotal moment was the deposition of the last Western Roman Emperor, Romulus Augustulus, by the Germanic chieftain Odoacer in 476 AD, which traditionally marks the end of the Western Roman Empire.


In [4]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_openai import OpenAIEmbeddings

if provider == "google_genai":
    embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
elif provider == "openai":
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [5]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [6]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_docling import DoclingLoader

document_type = "pdf"  # "markdown" / "pdf"

if document_type == "markdown":
    loader = DirectoryLoader(
        path="../api/documents/markdown",
        loader_cls=TextLoader,
    )
elif document_type == "pdf":
    loader = DirectoryLoader(
        path="../api/documents/pdf",
        loader_cls=DoclingLoader,
    )

docs = loader.load()

print(f"Loaded {len(docs)} documents.")
print(f"Total characters in first document: {len(docs[0].page_content)}")

The plugin langchain_docling will not be loaded because Docling is being executed with allow_external_plugins=false.
The plugin langchain_docling will not be loaded because Docling is being executed with allow_external_plugins=false.
[32m[INFO] 2026-02-08 10:24:14,799 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-08 10:24:14,820 [RapidOCR] download_file.py:60: File exists and is valid: C:\Projects\gen-ai-rag-bot\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-08 10:24:14,820 [RapidOCR] main.py:53: Using C:\Projects\gen-ai-rag-bot\.venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_det_infer.onnx[0m
[32m[INFO] 2026-02-08 10:24:14,919 [RapidOCR] base.py:22: Using engine_name: onnxruntime[0m
[32m[INFO] 2026-02-08 10:24:14,929 [RapidOCR] download_file.py:60: File exists and is valid: C:\Projects\gen-ai-rag-bot\.venv\Lib\site-packages\rapidocr\models\ch_ppocr_mobile_v2.0_cls_infer.onnx[0m
[32m[INFO] 2026-02-08

Loaded 31 documents.
Total characters in first document: 278


In [7]:
print(docs[0].page_content[:600])

Garanția produselor - KronMag.ro
La KronMag, ne angajăm să oferim produse de calitate, conforme cu standardele și legislația în vigoare. Toate produsele comercializate beneficiază de garanție legală, iar procesul de soluționare a eventualelor probleme este rapid și transparent.


In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split documents into {len(all_splits)} sub-documents.")

Split documents into 31 sub-documents.


In [9]:
document_ids = vector_store.add_documents(documents=all_splits)

print(document_ids[:3])

['4c4b2f00-f3f8-41db-9921-81a2dfcd5ad9', '2ef54fe1-9de4-4444-8961-00fa9f99c43c', '889e07ae-6e41-4f17-af23-e2506a28711a']


In [10]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [11]:
from langchain.agents import create_agent


tools = [retrieve_context]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from a few documents. "
    "Use the tool to help answer user queries."
)
agent = create_agent(model, tools, system_prompt=prompt)

In [13]:
query = "Care e durata garantiei?"
# query = "Ce metode de livrare sunt disponibile?"

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


Care e durata garantiei?
Tool Calls:
  retrieve_context (4e52bcaf-4d5d-4b11-bf97-5cf7c69032f0)
 Call ID: 4e52bcaf-4d5d-4b11-bf97-5cf7c69032f0
  Args:
    query: durata garantiei
Name: retrieve_context

Source: {'source': '..\\api\\documents\\pdf\\garantia_produselor.pdf', 'dl_meta': {'schema_name': 'docling_core.transforms.chunker.DocMeta', 'version': '1.0.0', 'doc_items': [{'self_ref': '#/texts/5', 'parent': {'$ref': '#/body'}, 'children': [], 'content_layer': 'body', 'label': 'text', 'prov': [{'page_no': 1, 'bbox': {'l': 133.768, 't': 467.099, 'r': 479.141, 'b': 409.316, 'coord_origin': 'BOTTOMLEFT'}, 'charspan': [0, 342]}]}], 'headings': ['2. Durata garanției'], 'origin': {'mimetype': 'application/pdf', 'binary_hash': 11319521422555953865, 'filename': 'garantia_produselor.pdf'}}, 'start_index': 0}
Content: 2. Durata garanției
Durata garanției variază în funcție de natura și categoria produsului. Majoritatea produselor beneficiază de o garanție de minimum 24 de luni, cu excepția con