
# Hybrid RAG – Update & Q&A (Notebook)
This notebook provides two main parts:
1. **Update/Build index** from your local `data/` folder using functions from `app.py`.
2. **Ask questions** over the indexed content with optional filters.

> Expected layout (same as your VS Code script):
- `data/structured` → CSV/XLSX tables  
- `data/unstructured` → PDF/TXT/MD/DOCX  


In [13]:

# === Cell 1: Build / Update index ===
# Make sure this notebook sits next to `app.py`, or adjust the path below.
import sys, os
from pathlib import Path
%pip install ipywidgets

# Add current dir to path so we can import app.py
nb_dir = Path.cwd()
if str(nb_dir) not in sys.path:
    sys.path.insert(0, str(nb_dir))

from app import (
    build_or_update_index, check_storage, verify_coverage, STORAGE_DIR
)

print("Building/updating index from:", STORAGE_DIR)
index = build_or_update_index(STORAGE_DIR)

print("\nQuick checks...")
check_storage()
print("\nVerifying coverage...")
coverage = verify_coverage(verbose=True)
print("\nDone.")


Note: you may need to restart the kernel to use updated packages.
Building/updating index from: D:\attarat\project1\storage
Creating fresh storage...
Using embedding dimension: 384
Storage initialized with empty FAISS index.
⚠️ Creating new index...


Parsing nodes:   0%|          | 0/585 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/932 [00:00<?, ?it/s]

2025-08-23 15:49:32,970 - INFO - Loading llama_index.vector_stores.faiss.base from D:\attarat\project1\storage\default__vector_store.json.
2025-08-23 15:49:33,007 - INFO - Loading llama_index.vector_stores.faiss.base from D:\attarat\project1\storage\default__vector_store.json.



Processing 585 documents...
✓ Index built and persisted.

Quick checks...
OK: Storage folder found.
Total vectors in FAISS index: 932

Verifying coverage...
Files in data: 4
Files recorded in storage (manifest hits): 4
FAISS vectors: 932

Done.


In [16]:

# === Cell 2: Ask (programmatic) ===
from app import ask_one

def ask(query: str,
        top_k: int = 6,
        restrict: str | None = None,   # None | "excel" | "csv" | "unstructured" | "all"
        file: str | None = None,
        sheet: str | None = None,
        include_sources: bool = True):
    """
    Ask the indexed data. Use `restrict`, `file`, `sheet` to narrow the scope.
    - restrict: None | "excel" | "csv" | "unstructured" | "all"
    - file:     target file name (e.g., 'plant.xlsx' or 'report.pdf')
    - sheet:    Excel sheet name (only for Excel queries)
    """
    return ask_one(
        query=query,
        top_k=top_k,
        include_sources=include_sources,
        restrict=restrict,
        file=file,
        sheet=sheet,
    )

# Example usage (uncomment to try after building index):
# print(ask("Summarize key findings."))
# print(ask("List top 5 rows by 'Power'", restrict="csv"))
# print(ask("Which rows have highest 'Voltage'?", restrict="excel", file="plant.xlsx", sheet="Sheet1"))


In [17]:

# === Cell 3: Interactive Q&A (widgets) ===
# Requires: ipywidgets
# If not installed: pip install ipywidgets && enable widgets extension in your Jupyter environment
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output

q_input = widgets.Textarea(
    value="",
    placeholder="Type your question here...",
    description="Question",
    layout=widgets.Layout(width="100%", height="90px")
)

restrict_dd = widgets.Dropdown(
    options=[("Auto (None)", None),
             ("All", "all"),
             ("CSV only", "csv"),
             ("Excel only", "excel"),
             ("Unstructured only", "unstructured")],
    value=None,
    description="Restrict"
)

file_input = widgets.Text(
    value="",
    placeholder="Exact filename (optional), e.g., report.pdf or plant.xlsx",
    description="File"
)

sheet_input = widgets.Text(
    value="",
    placeholder="Excel sheet name (optional)",
    description="Sheet"
)

topk_slider = widgets.IntSlider(
    value=6, min=1, max=20, step=1, description="Top-K"
)

run_btn = widgets.Button(
    description="Ask",
    button_style="primary",
    tooltip="Run query"
)

out = widgets.Output(layout={"border": "1px solid var(--jp-layout-color2)"})
controls = widgets.VBox([q_input, restrict_dd, file_input, sheet_input, topk_slider, run_btn])
display(controls, out)

def on_run_clicked(b):
    with out:
        clear_output()
        query = q_input.value.strip()
        if not query:
            display(Markdown("**Please enter a question.**"))
            return
        try:
            resp = ask(query=query,
                       top_k=topk_slider.value,
                       restrict=restrict_dd.value,
                       file=file_input.value.strip() or None,
                       sheet=sheet_input.value.strip() or None,
                       include_sources=True)
            # resp is expected to be string or an object convertible to string.
            display(Markdown(f"### Result\n\n{resp}"))
        except Exception as e:
            display(Markdown(f"**Error:** `{e}`"))

run_btn.on_click(on_run_clicked)


VBox(children=(Textarea(value='', description='Question', layout=Layout(height='90px', width='100%'), placehol…

Output(layout=Layout(border_bottom='1px solid var(--jp-layout-color2)', border_left='1px solid var(--jp-layout…