In [1]:
import requests
import os
import time
from pathlib import Path
import base64
import io
from PIL import Image
import getpass

# nv-ingest imports
from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_runners import run_pipeline
from nv_ingest_client.client import Ingestor, NvIngestClient
from nv_ingest_api.util.message_brokers.simple_message_broker import SimpleClient

In [2]:
if not os.environ.get("NVIDIA_API_KEY"):
    print("[ACTION REQUIRED] Please enter your NVIDIA API Key (starts with 'nvapi-').")
    os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter NVIDIA API Key: ")
else:
    print("[INFO] NVIDIA API Key detected in environment.")

[INFO] NVIDIA API Key detected in environment.


In [3]:
# 1. Download Sample PDF (World Bank Peru 2017)
# PDF_URL = "https://documents1.worldbank.org/curated/en/484531533637705178/pdf/129273-WP-PUBLIC-Peru-2017.pdf"
# PDF_PATH = "129273-WP-PUBLIC-Peru-2017.pdf"
PDF_PATH = "./pdf/ระบบซื้อ.pdf"

# if not Path(PDF_PATH).exists():
#     print(f"[INFO] Downloading {PDF_PATH}...")
#     response = requests.get(PDF_URL)
#     with open(PDF_PATH, "wb") as f:
#         f.write(response.content)

# 2. Start nv-ingest in Library Mode
# This spins up the pipeline locally but uses remote NIMs by default for inference.
print("[INFO] Starting Ingestion Pipeline (Library Mode)...")
run_pipeline(
    block=False,
    disable_dynamic_scaling=True,
    run_in_subprocess=True,
    quiet=True
)
time.sleep(15) # Warmup

client = NvIngestClient(
    message_client_allocator=SimpleClient,
    message_client_port=7671, # Default LibMode port
    message_client_hostname="localhost"
)

print("[INFO] Submitting job...")

# 4. Extract Content
ingestor = (
    Ingestor(client=client)
    .files([PDF_PATH])
    .extract(
        extract_text=True,
        extract_tables=True,
        extract_charts=True,  # Triggers remote YOLOX for chart crops
        extract_images=True, # Focus on charts/tables for VLM
        extract_method="pdfium", # CPU-based text extraction
        table_output_format="markdown"
    )
)

job_results = ingestor.ingest()
extracted_data = job_results[0]
print(f"[SUCCESS] Extracted {len(extracted_data)} items from document.")

[INFO] Starting Ingestion Pipeline (Library Mode)...
DEBUG: Direct print to stdout - should appear in parent process


DEBUG: Direct write to stderr - should appear in parent process


[INFO] Submitting job...
[SUCCESS] Extracted 452 items from document.


In [4]:
table_items = [item for item in extracted_data if item["metadata"]["content_metadata"].get("subtype") == "table"]

if table_items:
    # Let's inspect a table from the Appendix (usually near the end of the doc)
    sample = table_items[-1]["metadata"]
    print("="*60)
    print("EXTRACTED TABLE ITEM")
    print("="*60)
    print(f"Type: {sample['content_metadata']['subtype']}")
    print(f"Page: {sample['content_metadata']['page_number']}")

    image_meta = sample.get("image_metadata") or {}
    has_image = bool(image_meta.get("content"))

    print(f"Has Image Content? {has_image}")

    print("\nMarkdown Content (Snippet):")
    table_meta = sample.get("table_metadata") or {}
    content = table_meta.get("table_content", "")
    print(content[:300] + "...")

EXTRACTED TABLE ITEM
Type: table
Page: 109
Has Image Content? False

Markdown Content (Snippet):
| Todey Zoom In Toom Out |  |  |  |  |  |  |
| --- | --- | --- | --- | --- | --- | --- |
| 1 sumen 2558 |  |  |  |  |  |  |
|  |  |  |  |  | and |  |
|  |  |  |  | 3 |  |  |
|  | 30 |  |  |  |  |  |
|  |  |  |  | 10 |  |  |
| 13 | 24 |  |  |  |  | 29 |
|  | 21 | 22 | 23 | 24 | 25 |  |...


In [5]:
from transformers import AutoModel, AutoProcessor, AutoConfig
from pymilvus import MilvusClient
import numpy as np
import torch
import io
from PIL import Image
import base64
import transformers

# Configuration
HF_EMBED_MODEL_ID = "nvidia/llama-nemotron-embed-vl-1b-v2"
COLLECTION_NAME = "worldbank_peru_2017"
MILVUS_URI = "milvus_wb_demo.db"

print(f"[INFO] Transformers Version: {transformers.__version__}")
print(f"[INFO] Loading Embedding Model: {HF_EMBED_MODEL_ID}...")

# --- 1. HARDWARE-AWARE CONFIGURATION PATCH ---
# Check GPU capabilities to ensure T4 (Turing) stability and H100 (Hopper) performance.
use_flash_attn = False
model_dtype = torch.float32 # Default fallback

if torch.cuda.is_available():
    major, minor = torch.cuda.get_device_capability(0)
    print(f"[INFO] GPU Compute Capability: {major}.{minor}")

    # Precision: Use bfloat16 for Ampere+ (8.0+), float16 for Turing (7.5) or older
    if major >= 8:
        model_dtype = torch.bfloat16
        print("[INFO] Using bfloat16 precision (Ampere+ detected).")
    else:
        model_dtype = torch.float16
        print("[INFO] Using float16 precision (Turing/Older detected).")

    # Attention: Flash Attention 2 requires Ampere+ (8.0+)
    if major >= 8:
        try:
            import flash_attn
            use_flash_attn = True
        except ImportError:
            pass

    if use_flash_attn:
        attn_impl = "flash_attention_2"
        print("[INFO] Flash Attention 2 enabled.")
        config = None
    else:
        attn_impl = "eager"
        reason = "Hardware incompatible (needs Ampere+)" if major < 8 else "Library not installed"
        print(f"[WARNING] Flash Attention 2 disabled: {reason}. Forcing 'eager' mode.")

        # --- CONFIG PATCH ---
        config = AutoConfig.from_pretrained(HF_EMBED_MODEL_ID, trust_remote_code=True)
        if hasattr(config, "llm_config"):
            original_config_class = config.llm_config.__class__
            class SafeConfig(original_config_class):
                def __setattr__(self, name, value):
                    if name == "_attn_implementation" and value == "flash_attention_2":
                        return
                    super().__setattr__(name, value)
            config.llm_config.__class__ = SafeConfig
            config.llm_config._attn_implementation = "eager"
else:
    attn_impl = "eager"
    config = None
    print("[WARNING] Running on CPU.")

# --- 2. WORKAROUND FOR TOKENIZER BACKEND BUG ---
# Even with downgraded transformers, sometimes the backend class needs patching 
# for specific model configurations.
try:
    from transformers.tokenization_utils_base import TokenizersBackend as OriginalTokenizersBackend
    
    # Check if patch is needed (if attribute is missing)
    if not hasattr(OriginalTokenizersBackend, "additional_special_tokens"):
        print("[INFO] Patching TokenizersBackend to handle missing 'additional_special_tokens'...")
        class PatchedTokenizersBackend(OriginalTokenizersBackend):
            def __getattr__(self, key):
                if key == "additional_special_tokens":
                    return []
                return super().__getattr__(key)
        
        import transformers.tokenization_utils_base
        transformers.tokenization_utils_base.TokenizersBackend = PatchedTokenizersBackend
except ImportError:
    pass

# --- 3. LOAD MODEL ---
print("[INFO] Loading model...")
embed_model = AutoModel.from_pretrained(
    HF_EMBED_MODEL_ID,
    config=config,
    torch_dtype=model_dtype,
    trust_remote_code=True,
    attn_implementation=attn_impl,
    device_map="auto" if torch.cuda.is_available() else "cpu"
).eval()

# Configure processor
embed_model.processor.max_input_tiles = 6
embed_model.processor.use_thumbnail = True

def base64_to_pil(b64_str):
    if not b64_str:
        return None
    try:
        return Image.open(io.BytesIO(base64.b64decode(b64_str))).convert("RGB")
    except Exception:
        return None

# --- 4. VECTOR DB SETUP ---
milvus_client = MilvusClient(MILVUS_URI)
if milvus_client.has_collection(COLLECTION_NAME):
    milvus_client.drop_collection(COLLECTION_NAME)

milvus_client.create_collection(
    collection_name=COLLECTION_NAME,
    dimension=2048,
    auto_id=True
)

# --- 5. INFERENCE LOOP ---
vectors_to_insert = []
print("[INFO] Generating Multimodal Embeddings...")

# Define safe limits based on hardware to prevent OOM on T4
if use_flash_attn:
    LIMIT_TEXT = 8192
    LIMIT_MM = 10240
else:
    print("[INFO] T4/Eager mode active: Reducing embedding context window to 4096.")
    LIMIT_TEXT = 4096
    LIMIT_MM = 4096

with torch.inference_mode():
    for item in extracted_data:
        metadata = item.get("metadata", {})
        content_meta = metadata.get("content_metadata") or {}

        doc_type = content_meta.get("type", "text")
        subtype = content_meta.get("subtype", "")
        content_text = metadata.get("content", "")

        image_obj = None
        modality = "text"

        # Check for Chart/Table images
        b64_data = None
        if doc_type == "structured" or subtype in ["chart", "table"]:
            # Safe access to nested metadata
            image_meta = metadata.get("image_metadata") or {}
            if image_meta.get("content"):
                 b64_data = image_meta["content"]
            elif len(content_text) > 100 and " " not in content_text[:50]:
                 b64_data = content_text

            if b64_data:
                image_obj = base64_to_pil(b64_data)

            if image_obj:
                modality = "image"
                table_meta = metadata.get("table_metadata") or {}
                table_content = table_meta.get("table_content")
                if table_content:
                    content_text = table_content
                    modality = "image_text"

        if not content_text and not image_obj:
            continue

        # Encode
        try:
            if modality == "image":
                embed_model.processor.p_max_length = 2048
                embeddings = embed_model.encode_documents(images=[image_obj])
            elif modality == "image_text":
                embed_model.processor.p_max_length = LIMIT_MM
                embeddings = embed_model.encode_documents(images=[image_obj], texts=[content_text])
            else:
                embed_model.processor.p_max_length = LIMIT_TEXT
                embeddings = embed_model.encode_documents(texts=[content_text])

            # L2 Normalize
            def _l2_normalize(x: torch.Tensor, eps: float = 1e-12) -> torch.Tensor:
                return x / (x.norm(p=2, dim=-1, keepdim=True) + eps)

            vec = _l2_normalize(embeddings[0]).float().cpu().numpy().tolist()

            vectors_to_insert.append({
                "vector": vec,
                "text": content_text[:60000],
                "source": metadata.get("source_metadata", {}).get("source_name", "Unknown"),
                "page": content_meta.get("page_number", -1),
                "type": subtype or doc_type,
                "has_image": True if image_obj else False,
                "image_b64": b64_data if image_obj else ""
            })

            # Periodic cleanup for T4 fragmentation
            if len(vectors_to_insert) % 10 == 0:
                torch.cuda.empty_cache()

        except RuntimeError as e:
            if "out of memory" in str(e):
                print(f"[WARN] Skipped embedding item due to OOM (Length: {len(content_text)}). Clearing cache.")
                torch.cuda.empty_cache()
            else:
                raise e

milvus_client.insert(collection_name=COLLECTION_NAME, data=vectors_to_insert)
print(f"[SUCCESS] Indexed {len(vectors_to_insert)} items.")


[INFO] Transformers Version: 4.46.3
[INFO] Loading Embedding Model: nvidia/llama-nemotron-embed-vl-1b-v2...
[INFO] GPU Compute Capability: 8.9
[INFO] Using bfloat16 precision (Ampere+ detected).
[INFO] Flash Attention 2 enabled.
[INFO] Loading model...


Some kwargs in processor config are unused and will not have any effect: num_image_token, max_input_tiles, pad_to_multiple_of, dynamic_image_size, p_max_length, use_thumbnail, passage_prefix, image_size, norm_type, template, query_prefix, num_channels, padding, system_message, q_max_length. 


[INFO] Generating Multimodal Embeddings...
[SUCCESS] Indexed 452 items.


In [6]:
import torch
import gc
from transformers import AutoModelForSequenceClassification, AutoProcessor, AutoConfig, AutoModel

# Configuration
HF_EMBED_MODEL_ID = "nvidia/llama-nemotron-embed-vl-1b-v2"
HF_RERANK_MODEL_ID = "nvidia/llama-nemotron-rerank-vl-1b-v2"

# --- SMART HARDWARE DETECTION ---
def get_hardware_strategy():
    """
    Determines precision, attention, tiling, and memory strategy.
    Checks BOTH hardware capability AND library availability.
    """
    strategy = {
        "dtype": torch.float32,
        "attn": "eager",
        "tiles": 1,
        "swap_models": True # Default to safe mode (swap)
    }
    
    if torch.cuda.is_available():
        props = torch.cuda.get_device_properties(0)
        major, minor = props.major, props.minor
        vram_gb = props.total_memory / (1024**3)
        
        print(f"[HARDWARE] Detected: {props.name} | VRAM: {vram_gb:.1f}GB | Compute: {major}.{minor}")
        
        # Check if Flash Attention library is actually installed
        has_flash_lib = False
        try:
            import flash_attn
            has_flash_lib = True
        except ImportError:
            pass

        # 1. Architecture Strategy
        if major >= 8: # Ampere/Hopper (A100, H100)
            strategy["dtype"] = torch.bfloat16
            
            if has_flash_lib:
                strategy["attn"] = "flash_attention_2"
                strategy["tiles"] = 6 # Max resolution
                print("[CONFIG] Mode: High Performance (Flash Attention 2)")
            else:
                strategy["attn"] = "eager"
                strategy["tiles"] = 6 # H100 has 80GB, can handle high-res eager
                print("[CONFIG] Mode: Fallback (Eager Attention on High-End GPU)")
                
        else: # Turing (T4)
            strategy["dtype"] = torch.float16
            strategy["attn"] = "eager"
            strategy["tiles"] = 1 # Low resolution for memory safety
            print("[CONFIG] Mode: Low VRAM (T4 Optimization)")
            
        # 2. Memory Strategy (Swap vs Keep)
        # If we have > 24GB VRAM, we can keep both models loaded safely.
        if vram_gb > 24.0:
            strategy["swap_models"] = False
            print("[STRATEGY] High VRAM detected. Keeping models resident in memory.")
        else:
            strategy["swap_models"] = True
            print("[STRATEGY] Low VRAM detected. Enabling Aggressive Model Swapping.")
            
    return strategy

config_strat = get_hardware_strategy()

# Global placeholders
embed_model = None
rerank_model = None
rerank_processor = None

def apply_safe_config(config_obj):
    """
    Prevents the model from auto-enabling Flash Attention 
    when the library is installed but broken (missing nvcc) OR when on T4.
    """
    # If we decided on Eager, force the config to match
    if config_strat["attn"] == "eager":
        if hasattr(config_obj, "llm_config"):
            original_class = config_obj.llm_config.__class__
            
            # Create a "Safe" config class that ignores attempts to set FA2
            class SafeConfig(original_class):
                def __setattr__(self, name, value):
                    # Block attempts to enable FA2
                    if name == "_attn_implementation" and value == "flash_attention_2":
                        return 
                    super().__setattr__(name, value)
            
            # Apply the safe class to the config object
            config_obj.llm_config.__class__ = SafeConfig
            config_obj.llm_config._attn_implementation = "eager"
            
    return config_obj

def load_embed_model():
    global embed_model
    if embed_model is not None: return
    
    print("[LOAD] Loading Embedding Model...")
    cfg = AutoConfig.from_pretrained(HF_EMBED_MODEL_ID, trust_remote_code=True)
    cfg = apply_safe_config(cfg) 
    
    embed_model = AutoModel.from_pretrained(
        HF_EMBED_MODEL_ID,
        config=cfg,
        torch_dtype=config_strat["dtype"],
        trust_remote_code=True,
        attn_implementation=config_strat["attn"],
        device_map="auto"
    ).eval()

def load_rerank_model():
    global rerank_model, rerank_processor
    if rerank_model is not None: return

    print(f"[LOAD] Loading Reranker (Tiles={config_strat['tiles']})...")
    cfg = AutoConfig.from_pretrained(HF_RERANK_MODEL_ID, trust_remote_code=True)
    cfg = apply_safe_config(cfg)
    
    rerank_model = AutoModelForSequenceClassification.from_pretrained(
        HF_RERANK_MODEL_ID,
        config=cfg,
        torch_dtype=config_strat["dtype"],
        trust_remote_code=True,
        attn_implementation=config_strat["attn"],
        device_map="auto"
    ).eval()
    
    rerank_processor = AutoProcessor.from_pretrained(
        HF_RERANK_MODEL_ID, 
        trust_remote_code=True,
        max_input_tiles=config_strat["tiles"],
        use_thumbnail=True
    )

def unload_model(model_ref):
    """Forcefully removes a model from VRAM."""
    if model_ref is None: return None
    del model_ref
    torch.cuda.empty_cache()
    gc.collect()
    return None

def retrieve_and_rerank(query, retrieve_k=20, final_k=5):
    global embed_model, rerank_model, rerank_processor
    
    # ---------------------------------------------------------
    # STAGE 1: DENSE RETRIEVAL
    # ---------------------------------------------------------
    load_embed_model()
    
    with torch.no_grad():
        q_emb = embed_model.encode_queries([query])[0].float().cpu().numpy().tolist()
    
    # If low VRAM, unload embedding model immediately
    if config_strat["swap_models"]:
        embed_model = unload_model(embed_model)
        
    hits = milvus_client.search(
        collection_name=COLLECTION_NAME,
        data=[q_emb],
        limit=retrieve_k,
        output_fields=["text", "page", "source", "type", "has_image", "image_b64"]
    )[0]
    
    if not hits:
        return [], []

    # ---------------------------------------------------------
    # STAGE 2: RERANKING
    # ---------------------------------------------------------
    load_rerank_model()
    
    rerank_inputs = []
    valid_hits = []
    
    for hit in hits:
        ent = hit['entity']
        img_obj = base64_to_pil(ent.get("image_b64")) if ent.get("has_image") else None
        # Truncate text if needed
        txt_limit = 1000 if config_strat["swap_models"] else 8000
        text_content = ent['text'][:txt_limit] if ent['text'] else ""
        
        rerank_inputs.append({
            "question": query,
            "doc_text": text_content,
            "doc_image": img_obj if img_obj else ""
        })
        valid_hits.append(hit)

    # Process
    all_scores = []
    # Batch size: 1 for T4 safety, 4 for H100 speed
    batch_size = 4 if not config_strat["swap_models"] else 1
    
    for i in range(0, len(rerank_inputs), batch_size):
        batch = rerank_inputs[i:i+batch_size]
        inputs = rerank_processor.process_queries_documents_crossencoder(batch)
        inputs = {k: v.to("cuda") if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = rerank_model(**inputs)
            logits = outputs.logits.squeeze(-1).float().cpu().numpy()
            if logits.ndim == 0: all_scores.append(float(logits))
            else: all_scores.extend(logits.tolist())
            
        del inputs, outputs
        if config_strat["swap_models"] and i % 5 == 0: 
            torch.cuda.empty_cache()

    # If low VRAM, unload reranker
    if config_strat["swap_models"]:
        rerank_model = unload_model(rerank_model)
        rerank_processor = unload_model(rerank_processor)

    # ---------------------------------------------------------
    # SORT & RETURN
    # ---------------------------------------------------------
    for i, score in enumerate(all_scores):
        valid_hits[i]['score'] = score
    
    valid_hits.sort(key=lambda x: x['score'], reverse=True)
    return hits, valid_hits[:final_k]

print(f"[INFO] Universal Retrieval Pipeline Ready (Strategy: {config_strat['attn']}).")

[HARDWARE] Detected: NVIDIA L40S | VRAM: 44.4GB | Compute: 8.9
[CONFIG] Mode: High Performance (Flash Attention 2)
[STRATEGY] High VRAM detected. Keeping models resident in memory.
[INFO] Universal Retrieval Pipeline Ready (Strategy: flash_attention_2).


In [7]:
from openai import OpenAI
import re
from IPython.display import display, Markdown

def generate_enterprise_answer(query, hits):
    context_str = ""
    for i, hit in enumerate(hits):
        ent = hit['entity']
        type_label = ent['type'].upper()
        context_str += f"[Source ID: {i+1} | Type: {type_label} | Page: {ent['page']}]\n{ent['text']}\n\n"

    system_prompt = (
        "You are an expert Document Intelligence Analyst. Your goal is to extract precise insights from complex multimodal documents (text, charts, and tables).\n"
        "Answer based ONLY on the provided context chunks.\n\n"
        "GUIDELINES:\n"
        "1. **Start with a <think> block**: Before answering, explicitly reason through the following checks:\n"
        "   - **Visual Legend Check**: If analyzing a chart, identify the legend or color coding first. Ensure you are extracting data for the specific entity requested (e.g., the specific country/company vs. the benchmark/competitor).\n"
        "   - **Table Logic**: If extracting data from a table, look for 'Total', 'All', or 'Average' rows/columns before defaulting to granular sub-categories (like 'Small/Medium/Large').\n"
        "   - **Numerical Precision**: Be vigilant for decimal points and units (e.g., '0.5' vs '5', 'millions' vs 'billions'). If a number seems like an OCR error, verify it against surrounding text.\n"
        "   - **Temporal Filtering**: Ensure the data corresponds to the specific time period requested in the user's query.\n"
        "2. **Citation**: Every factual statement must cite its source exactly as provided (e.g., '[Source ID: 1, Page: 5]').\n"
        "3. **Honesty**: If the exact data point is not in the retrieval, state 'The provided context does not contain this specific data' rather than hallucinating.\n"
        "4. **Format**: After your <think> analysis, provide the final response starting with 'Answer:'."
    )

    client_llm = OpenAI(
        base_url="https://integrate.api.nvidia.com/v1",
        api_key=os.environ["NVIDIA_API_KEY"]
    )

    response = client_llm.chat.completions.create(
        model="nvidia/llama-3.3-nemotron-super-49b-v1.5",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Context:\n{context_str}\n\nQuestion: {query}"}
        ],
        temperature=0.1
    )
    return response.choices[0].message.content

def display_formatted_response(question, raw_output):
    """Display response with collapsible reasoning."""
    safe_text = raw_output.replace('$', '&dollar;')
    thought_match = re.search(r'<think>(.*?)</think>', safe_text, re.DOTALL)

    if thought_match:
        reasoning = thought_match.group(1).strip()
        answer = safe_text.split('</think>')[-1].strip()
    else:
        reasoning = "No internal reasoning trace provided."
        answer = safe_text

    output_md = f"""
### Question
{question}

<details>
<summary><strong>View Reasoning Process</strong></summary>
<br>
<div style="padding-left: 15px; border-left: 3px solid #ddd; color: #555;">

{reasoning}

</div>
</details>

### Answer
{answer}
<hr style="margin-top: 20px; border: 0; border-top: 1px solid #eee;">
"""
    display(Markdown(output_md))

In [8]:
questions = [
    # 1. Reading bar charts for growth stats
    "การยกเลิกเอกสารรายละเอียดผู้ขายต้องทำยังไงบ้าง",

    # 2. Reading charts regarding infrastructure reliability
    "การสร้างเอกสารรายละเอียดใบสั่งซื้อต้องทำยังไงบ้าง",

    # 3. Table Query: Column/Row lookup
    "วิธียกเลิกข้อมูลใบขอจัดซื้อต้องทำยังไงบ้าง",

    # 4. Table Query: Comparative analysis
    "การยกเลิกสถานะใบสั่งซื้อต้องทำยังไงบ้าง"
]

print("="*60)
print("Enterprise Intelligent Document Processing: LIVE DEMO")
print("="*60)

for q in questions:
    # 1. Retrieve & Rerank
    # We only want the final reranked results for the generation step
    _, hits = retrieve_and_rerank(q, retrieve_k=50, final_k=20)

    # 2. Generate
    if hits:
        # Generate raw text from the LLM
        raw_answer = generate_enterprise_answer(q, hits)

        # Format and display cleanly
        display_formatted_response(q, raw_answer)

    else:
        print(f"[No relevant data found for query: {q}]")

Enterprise Intelligent Document Processing: LIVE DEMO
[LOAD] Loading Embedding Model...


Some kwargs in processor config are unused and will not have any effect: num_image_token, max_input_tiles, pad_to_multiple_of, dynamic_image_size, p_max_length, use_thumbnail, passage_prefix, image_size, norm_type, template, query_prefix, num_channels, padding, system_message, q_max_length. 
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[LOAD] Loading Reranker (Tiles=6)...


config.json: 0.00B [00:00, ?B/s]

configuration_llama_nemotron_vl.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nvidia/llama-nemotron-rerank-vl-1b-v2:
- configuration_llama_nemotron_vl.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_llama_nemotron_vl.py: 0.00B [00:00, ?B/s]

processing_llama_nemotron_vl.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nvidia/llama-nemotron-rerank-vl-1b-v2:
- processing_llama_nemotron_vl.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/nvidia/llama-nemotron-rerank-vl-1b-v2:
- modeling_llama_nemotron_vl.py
- processing_llama_nemotron_vl.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/3.36G [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Some kwargs in processor config are unused and will not have any effect: per_tile_len, num_image_token, max_input_tiles, pad_to_multiple_of, dynamic_image_size, rerank_max_length, use_thumbnail, template, padding, prompt_template. 
Some kwargs in processor config are unused and will not have any effect: per_tile_len, num_image_token, max_input_tiles, pad_to_multiple_of, dynamic_image_size, rerank_max_length, use_thumbnail, template, padding, prompt_template. 



### Question
การยกเลิกเอกสารรายละเอียดผู้ขายต้องทำยังไงบ้าง

<details>
<summary><strong>View Reasoning Process</strong></summary>
<br>
<div style="padding-left: 15px; border-left: 3px solid #ddd; color: #555;">

Okay, let's tackle this question about how to cancel a supplier document in the Q-ERP system. The user is asking for the steps to cancel a supplier document. I need to refer to the provided context chunks to find the exact procedure.

First, I'll look through the context for sections related to canceling supplier documents. From the context, there's a section under "โปรแกรมผู้ขาย" (Supplier Program) that talks about canceling documents. 

Looking at Source ID: 1, Page: 27, there's a part that says: "3. การยกเลิกเอกสารรายละเอียดผู้ขาย 3.1 ในกรณีทีÉผู้ใช้ต้องการยกเลิกสถานะใบสัÉงซืÊอบางรายการ สามารถทําได้โดยเลือกข้อมูลของผู้ขาย รายทีÉต้องการยกเลิกสถานะ แล้วกดปุ ่ ม หรือเลือกคลิกทีÉแถบเมนูทีÉเมนูสถานะข้อมูล ยกเลิก ดังรูปทีÉ PO-44 เมืÉอผู้ใช้เลือก สถานะยกเลิก ให้แก่ใบสัÉงซืÊอแล้ว สีทีÉแสดงข้อมูลของ ใบสัÉงซืÊอจะเปลีÉยนเป็นสีแดง"

Wait, but this seems to be about canceling a purchase order (ใบสัÉงซืÊอ), not the supplier document itself. Let me check again.

Further down in the same source, under "2. การแก้ไขเอกสารรายละเอียดผู้ขาย", there's a mention of canceling. Wait, no. Let me check the section specifically about canceling supplier documents.

Looking back, in the "โปรแกรมผู้ขาย" section, under "2.1.5 เครืÉองมือสําหรับสถานะ : ยกเลิก (Status : Cancel Tool) ใช้สําหรับยกเลิกสถานะของผู้ขาย เพืÉอยกเลิกรายการนัÊน ปรากฏดังรูปทีÉ 12" (Source ID: 1, Page: 12). But the steps for cancellation might be in the later sections.

In the same source, under "3. การยกเลิกเอกสารรายละเอียดผู้ขาย" (Section 3: Cancellation of Supplier Document Details), the steps are:

3.1 Select the supplier data to be canceled from the list.
3.2 Click the tool button or select the status menu → Cancel.
3.3 The system will confirm, and the status of the supplier will change to canceled, displayed in red.

So the steps are: select the supplier record, use the cancel tool or status menu to cancel, confirm, and the status changes to red.

I need to make sure there are no other steps mentioned. The context mentions that after cancellation, the status is shown in red, and the user can still view the data but cannot edit or use it again.

Therefore, the answer should outline these steps based on the provided context.

</div>
</details>

### Answer
Answer: การยกเลิกเอกสารรายละเอียดผู้ขายในระบบ Q-ERP สามารถทำได้ตามขั้นตอนดังนี้ [Source ID: 1, Page: 27]:  
1. เลือกข้อมูลผู้ขายที่ต้องการยกเลิกสถานะจากตารางข้อมูล  
2. คลิกปุ่ม **สถานะ → ยกเลิก** (Status: Cancel Tool) บนส่วนกล่องเครื่องมือ หรือเลือกเมนูสถานะข้อมูล → ยกเลิก  
3. ระบบจะขอการยืนยันการยกเลิก  
4. หลังยืนยัน สีที่แสดงข้อมูลผู้ขายในตารางจะเปลี่ยนเป็นสีแดง หมายถึงสถานะถูกยกเลิก  

**หมายเหตุ:** ข้อมูลผู้ขายที่ถูกยกเลิกยังสามารถดูรายละเอียดได้ แต่ไม่สามารถแก้ไขหรือนำไปใช้งานในระบบได้อีกต่อไป [Source ID: 1, Page: 27].
<hr style="margin-top: 20px; border: 0; border-top: 1px solid #eee;">



### Question
การสร้างเอกสารรายละเอียดใบสั่งซื้อต้องทำยังไงบ้าง

<details>
<summary><strong>View Reasoning Process</strong></summary>
<br>
<div style="padding-left: 15px; border-left: 3px solid #ddd; color: #555;">

Okay, let's tackle this question about creating a purchase order document in the Q-ERP system. The user is asking for the steps to create a purchase order. I need to refer to the provided context chunks to find the relevant information.

First, I'll look through the context for sections related to creating a purchase order. From the context, there's a section titled "การสร้างเอกสารรายละเอียดใบสัÉงซืÊอ" which translates to creating a purchase order document. 

Looking at the steps mentioned in the context, the process starts by accessing the Purchase menu and selecting the Purchase Order document form. Then, the user needs to click the 'New' tool in the toolbar to start creating a new document. The system will display a form with various fields that need to be filled in. Some fields are mandatory, marked with an asterisk (*), so those must be completed. 

The user might need to select data from existing records, like customer or product information, using the search tools provided. For example, clicking on certain fields might open a search window to select a customer or product from the database. After filling in all necessary details, the user saves the document by clicking the 'Save' button. There might be a confirmation prompt to ensure the user wants to proceed with saving.

I should also check if there are any specific details mentioned, like required fields or additional steps for different types of purchases (domestic vs. international). The context mentions that the system handles both domestic and international purchases, so the user might need to specify that. 

Additionally, there's a mention of a 'Print Preview' option, so after saving, the user might have the option to print the document. However, the question is specifically about creating the document, so printing might be an optional step.

I need to make sure the steps are in order and reference the correct source IDs where the information is found. The context chunks from Source ID: 1 (the main text) and possibly others like Source ID: 6 or 10 which have tables related to purchase orders might be relevant. 

Wait, looking at the context, the detailed steps for creating a purchase order are in the section under "โปรแกรมเอกสารใบสัÉงซืÊอ" (Purchase Order Document Program). The steps are outlined from 1.1 to 1.4, including accessing the menu, using the toolbar, filling in the form, and saving. There's also mention of using search tools for selecting data, which is part of the process.

I should structure the answer by listing the steps in order, citing the source where each step is described. Also, note any mandatory fields and the use of search functions. Since the user might be looking for a concise step-by-step guide, I'll present it clearly, ensuring each step references the correct part of the context.

</div>
</details>

### Answer
Answer: การสร้างเอกสารรายละเอียดใบสั่งซื้อ (Purchase Order) ในระบบ Q-ERP มีขั้นตอนดังนี้ [Source ID: 1, Page: 67-74]:  

1. **เข้าเมนู ระบบซื้อ (Purchase)**  
   - คลิกเมนู **ใบสั่งซื้อ (Purchase Order)** ในหน้าจอ Q-ERP Menu  

2. **ใช้เครื่องมือสร้างเอกสารใหม่**  
   - คลิกปุ่ม **สร้าง (New)** ในกล่องเครื่องมือ เพื่อเริ่มสร้างเอกสารใบสั่งซื้อ  

3. **กรอกข้อมูลหลัก**  
   - กรอกข้อมูลที่มีเครื่องหมาย * (필수) เช่น  
     - **เลขที่เอกสาร (Document Number)**  
     - **วันที่สั่งซื้อ (Order Date)**  
     - **สกุลเงิน (Currency)**  
     - **รหัสผู้ขาย (Supplier Code)**  
     - **ชื่อผู้ขาย (Supplier Name)**  
   - ใช้ปุ่มค้นหา (ปุ่มที่มีไอคอนลูกศรหรือเครื่องหมาย...) เพื่อเลือกข้อมูลจากฐานข้อมูล (เช่น ผู้ขาย, สินค้า)  

4. **เพิ่มรายการสินค้า/บริการ**  
   - ในส่วน **รายการ (Items)**:  
     - คลิกที่ช่องว่างในตารางเพื่อเพิ่มรายการ  
     - ค้นหาสินค้าโดยคลิกปุ่มค้นหา (ไอคอนลูกศร) ที่ช่อง **รหัสสินค้า (Item Code)**  
     - กรอก **จํานวน (Quantity)**, **หน่วย (Unit)**, **ราคา/หน่วย (Unit Price)**  
     - ระบบจะคำนวณ **รวมเงิน (Total)** อัตโนมัติ  

5. **บันทึกเอกสาร**  
   - คลิกปุ่ม **บันทึก (Save)** ในกล่องเครื่องมือ  
   - ระบบจะขอ確認การบันทึก -> คลิก **ใช่ (Yes)**  

6. **พิมพ์เอกสาร (ถ้าต้องการ)**  
   - คลิกปุ่ม **พิมพ์ (Print)** หรือ **ดู قبلพิมพ์ (Print Preview)** ในกล่องเครื่องมือ  

**หมายเหตุ**:  
- ข้อมูลที่มีเครื่องหมาย * ต้องกรอกให้ครบ  
- สามารถแก้ไขข้อมูลได้โดยการคลิกปุ่ม **แก้ไข (Edit)** หลังบันทึกครั้งแรก [Source ID: 1, Page: 74]  

[Source ID: 1, Page: 67-74]
<hr style="margin-top: 20px; border: 0; border-top: 1px solid #eee;">



### Question
วิธียกเลิกข้อมูลใบขอจัดซื้อต้องทำยังไงบ้าง

<details>
<summary><strong>View Reasoning Process</strong></summary>
<br>
<div style="padding-left: 15px; border-left: 3px solid #ddd; color: #555;">

Okay, let's tackle the user's question: "วิธียกเลิกข้อมูลใบขอจัดซื้อต้องทำยังไงบ้าง" which translates to "How to cancel purchase request data?" 

First, I need to refer to the provided context chunks to find the relevant information. The user is asking about the process to cancel a purchase request in the Q-ERP system.

Looking through the context, I remember that there's a section about the Purchase Requisition program. Let me check the sources. 

From Source ID: 1, there's a section titled "โปรแกรมเอกสารใบขอซื้อ" (Purchase Requisition Program). Specifically, under the part about canceling data, there's a step-by-step guide. 

In the context, under the section for canceling purchase request data, the steps are outlined. The user needs to select the data they want to cancel, then click the cancel status button in the tool menu. Then confirm the cancellation. 

Also, there's a mention that canceled data will be displayed in red and can still be viewed but not edited or used again. 

I should make sure there are no other steps or details in other sources. Checking other tables or text chunks, but it seems the main info is in Source ID: 1. 

So the answer should outline the steps: selecting the data, using the cancel tool, confirming, and noting the red text indication. Also, mention that it can't be edited after cancellation.

</div>
</details>

### Answer
Answer: การยกเลิกข้อมูลใบขอจัดซื้อ (Purchase Requisition) ในระบบ Q-ERP ทำได้โดยเลือกข้อมูลใบขอจัดซื้อที่ต้องการยกเลิกจากตารางแสดงรายการ แล้วคลิกปุ่ม **"ยกเลิกสถานะ"** ในกล่องเครื่องมือ (หรือเลือกจากแถบเมนู **"สถานะข้อมูล → ยกเลิก"**) หลังจากนั้น ระบบจะขอการยืนยันการยกเลิก โดยคลิก **"YES"** เพื่อเสร็จสิ้นการยกเลิก [Source ID: 1, Page: 51]. 

**หมายเหตุ:** ข้อมูลที่ถูกยกเลิกแล้วจะแสดงสถานะเป็นตัวอักษรสีแดงในตาราง และยังสามารถดูรายละเอียดได้ แต่ไม่สามารถแก้ไขหรือนำข้อมูลนั้นมาใช้งานได้อีก [Source ID: 1, Page: 51].
<hr style="margin-top: 20px; border: 0; border-top: 1px solid #eee;">



### Question
การยกเลิกสถานะใบสั่งซื้อต้องทำยังไงบ้าง

<details>
<summary><strong>View Reasoning Process</strong></summary>
<br>
<div style="padding-left: 15px; border-left: 3px solid #ddd; color: #555;">

Okay, let's tackle this question about how to cancel the status of a purchase order in the Q-ERP system. The user is asking for the steps involved in this process.

First, I need to recall the information provided in the context. From the documents, especially the sections related to the Purchase Order (ใบสั่งซื้อ) program, there's a mention of a "Status: Cancel Tool" in the toolbar. The context also describes that selecting a purchase order and then clicking on the status cancel option will change the status to canceled, and the text color turns red to indicate cancellation.

Looking at the specific context chunks, in Source ID: 1, under the section about the Purchase Order program, there's a part that says: "หากสังเกตให้ดี เมื่อผู้ใช้เลือก สถานะยกเลิก ให้แก่ใบสั่งซื้อแล้ว สีที่แสดงข้อมูลของใบสั่งซื้อจะเปลี่ยนเป็นสีแดง" which translates to the status changing to red when canceled. Also, in the same section, there's a step-by-step guide on how to cancel, mentioning selecting the order and using the status cancel tool.

Additionally, in Source ID: 6 (Page: 58), there's a table showing some data with a column labeled "Un" which might indicate the status, but it's not entirely clear. However, the main instructions are in the text sections.

So, putting it all together, the steps would involve:

1. Accessing the Purchase Order module.
2. Selecting the specific purchase order to cancel.
3. Using the "Status: Cancel Tool" from the toolbar or menu to change the status.
4. Confirming the cancellation, which changes the order's status and displays it in red.

I need to make sure there are no other steps mentioned, like needing to enter a reason for cancellation or any approvals required. The context doesn't mention any additional steps beyond selecting the order and using the cancel tool. Also, the cancellation seems to be a straightforward action without requiring further input beyond confirmation.

Another thing to check is if there are any prerequisites, like the order not being partially fulfilled or having related invoices. The context doesn't specify such conditions, so it's assumed that the user can cancel any order as long as it's in a cancellable state.

Therefore, the answer should outline the steps based on the provided context, citing the relevant source IDs where the information was found.

</div>
</details>

### Answer
Answer: การยกเลิกสถานะใบสั่งซื้อ (Purchase Order) ในระบบ Q-ERP สามารถทำได้ตามขั้นตอนดังนี้:

1. **เลือกใบสั่งซื้อที่ต้องการยกเลิก**  
   - คลิกเลือกข้อมูลใบสั่งซื้อที่ต้องการยกเลิกในตารางแสดงรายการ (ดังรูปทีÉ PO-44)

2. **ใช้เครื่องมือยกเลิกสถานะ**  
   - คลิกปุ่ม **"สถานะ: ยกเลิก"** (Status: Cancel Tool) บนส่วนกล่องเครื่องมือ หรือเลือกจากแถบเมนู **"สถานะข้อมูล → ยกเลิก"**  
   - ระบบจะแสดงหน้าต่างยืนยันการยกเลิก

3. **ยืนยันการยกเลิก**  
   - คลิกปุ่ม **"YES"** เพื่อยืนยันการยกเลิกสถานะใบสั่งซื้อ  
   - หลังการยกเลิก สีที่แสดงข้อมูลใบสั่งซื้อจะเปลี่ยนเป็นสีแดง (ดังรูปทีÉ PO-45)

**แหล่งข้อมูลอ้างอิง:**  
- [Source ID: 1, Page: 75] (ส่วนการยกเลิกสถานะใบสั่งซื้อ)  
- [Source ID: 1, Page: 56] (ส่วนกล่องเครื่องมือของโปรแกรมใบสั่งซื้อ)  

**หมายเหตุ:**  
- การยกเลิกสถานะใบสั่งซื้อจะไม่เปลี่ยนแปลงข้อมูลรายละเอียด แต่จะแสดงสถานะว่าเป็น "ยกเลิก" และสีแดงในตาราง  
- ผู้ใช้สามารถดูรายละเอียดของใบสั่งซื้อที่ถูกยกเลิกได้ แต่ไม่สามารถแก้ไขหรือนำข้อมูลไปใช้ในการทำงานต่อได้
<hr style="margin-top: 20px; border: 0; border-top: 1px solid #eee;">
