In [None]:
%pip install -U pip setuptools wheel

%pip install -U --prefer-binary \
    numpy PyMuPDF sentence-transformers faiss-cpu \
    fastapi uvicorn pydantic tiktoken \
    ipywidgets jupyterlab_widgets \
    requests beautifulsoup4

%pip install -U torch --index-url https://download.pytorch.org/whl/cpu

%pip install -U openai

%pip install -U sentence-transformers

# Sanity check: make sure the kernel sees faiss
import sys, platform, pkgutil
print("Python:", sys.version)
print("Kernel executable:", sys.executable)
print("faiss importable? ->", pkgutil.find_loader("faiss") is not None)


In [None]:
!pip install sentence-transformers faiss-cpu pymupdf fastapi uvicorn pydantic numpy tiktoken hf_xet

import os, json, re
from typing import List, Dict, Tuple
import numpy as np
import fitz  # PyMuPDF
import faiss

DATA_DIR = "data/arxiv"
INDEX_DIR = "artifacts"  

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(INDEX_DIR, exist_ok=True)


## Loading PDFs and extract text

In [19]:
def extract_text_from_pdf(path: str) -> str:
    doc = fitz.open(path)
    pages = []
    for p in doc:
        pages.append(p.get_text("text"))
    return "\n".join(pages)

def load_corpus(pdf_dir: str = DATA_DIR) -> Dict[str, str]:
    corpus = {}
    for name in os.listdir(pdf_dir):
        if name.lower().endswith(".pdf"):
            fp = os.path.join(pdf_dir, name)
            corpus[name] = extract_text_from_pdf(fp)
    return corpus

corpus = load_corpus()
print(f"Loaded {len(corpus)} PDFs: {list(corpus.keys())[:5]}{' ...' if len(corpus)>5 else ''}")


Loaded 50 PDFs: ['2508.10507v1 - Multi-Sample Anti-Aliasing and Constrained Optimization for 3D Gaussian Splatting.pdf', '2508.10528v1 - Med-GLIP Advancing Medical Language-Image Pre-training with Large-scale Grounded Dataset.pdf', '2508.10530v1 - Diversity First Quality Later A Two-Stage Assumption for Language Model Alignment.pdf', '2508.10539v1 - Improving Value-based Process Verifier via Low-Cost Variance Reduction.pdf', '2508.10548v1 - Stabilizing Long-term Multi-turn Reinforcement Learning with Gated Rewards.pdf'] ...


## Chunking (token-based, ~512 tokens with overlap)


In [20]:
# Token-based chunking (~512 tokens) with fallback to character-based if tiktoken isn't available
from typing import List

try:
    import tiktoken
    _enc = tiktoken.get_encoding("cl100k_base")
except Exception:
    _enc = None

def chunk_by_tokens(text: str, max_tokens: int = 512, overlap_tokens: int = 64) -> List[str]:
    """
    Token-based sliding window chunker targeting ~512 tokens per chunk with ~64-token overlap.
    Falls back to character-based (~4 chars/token heuristic) if tiktoken isn't available.
    """
    if _enc is None:
        approx = max_tokens * 4
        overlap_chars = overlap_tokens * 4
        clean = " ".join(text.split())
        chunks, start = [], 0
        while start < len(clean):
            end = min(start + approx, len(clean))
            chunks.append(clean[start:end])
            if end == len(clean): break
            start = end - overlap_chars
        return chunks

    tokens = _enc.encode(text)
    chunks = []
    start = 0
    while start < len(tokens):
        end = min(start + max_tokens, len(tokens))
        sub = tokens[start:end]
        chunks.append(_enc.decode(sub))
        if end == len(tokens): break
        start = end - overlap_tokens
    return chunks

# Build chunk list with metadata using token-based chunking
docs: List[str] = []
metadatas: List[Dict] = []

for fname, text in corpus.items():
    for ch in chunk_by_tokens(text, max_tokens=512, overlap_tokens=64):
        docs.append(ch)
        metadatas.append({"source": fname})

print(f"Total chunks: {len(docs)} from {len(corpus)} PDFs")


Total chunks: 2055 from 50 PDFs


## Embeddings


In [21]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def embed_texts(texts: List[str]) -> np.ndarray:
    # normalize_embeddings=True pairs well with FAISS inner-product search
    embs = model.encode(texts, normalize_embeddings=True, show_progress_bar=True)
    return np.asarray(embs, dtype="float32")

embs = embed_texts(docs)
embs.shape


Batches:   0%|          | 0/65 [00:00<?, ?it/s]

(2055, 384)

## Build FAISS index & persist artifacts

In [22]:
if len(docs) == 0:
    raise RuntimeError("No chunks found. Add PDFs to data/arxiv/ and rerun from the top.")

dim = embs.shape[1]
index = faiss.IndexFlatIP(dim)  # inner product on normalized vectors ≈ cosine similarity
index.add(embs)

# Persist index
faiss_path = os.path.join(INDEX_DIR, "arxiv.index")
faiss.write_index(index, faiss_path)

# Persist chunks (text + source) as JSONL so you can reload later
chunks_path = os.path.join(INDEX_DIR, "chunks.jsonl")
with open(chunks_path, "w", encoding="utf-8") as f:
    for t, m in zip(docs, metadatas):
        f.write(json.dumps({"text": t, "source": m["source"]}, ensure_ascii=False) + "\n")

print(f"Saved index to {faiss_path} and chunks to {chunks_path}")


Saved index to artifacts\arxiv.index and chunks to artifacts\chunks.jsonl


## Create SQLite FTS DB

In [23]:
import sqlite3, json
from pathlib import Path

DB_PATH = Path("artifacts/rag.db")
DB_PATH.parent.mkdir(parents=True, exist_ok=True)

con = sqlite3.connect(DB_PATH)
con.executescript("""
CREATE TABLE IF NOT EXISTS documents (
  doc_id   INTEGER PRIMARY KEY,
  source   TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS chunk_meta (
  chunk_id INTEGER PRIMARY KEY,
  doc_id   INTEGER NOT NULL,
  source   TEXT NOT NULL,
  FOREIGN KEY(doc_id) REFERENCES documents(doc_id)
);
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
  text,
  chunk_id UNINDEXED,
  doc_id   UNINDEXED,
  tokenize='porter'
);
DELETE FROM documents; DELETE FROM chunk_meta; DELETE FROM chunks_fts;
""")

cur = con.cursor()
src2id = {}
for src in sorted({m["source"] for m in metadatas}):
    cur.execute("INSERT INTO documents(source) VALUES (?)", (src,))
    src2id[src] = cur.lastrowid

for cid, (t, m) in enumerate(zip(docs, metadatas)):
    s = m["source"]; did = src2id[s]
    cur.execute("INSERT INTO chunk_meta(chunk_id, doc_id, source) VALUES (?,?,?)", (cid, did, s))
    cur.execute("INSERT INTO chunks_fts(rowid, text, chunk_id, doc_id) VALUES (?,?,?,?)", (cid, t, cid, did))

con.commit(); con.close()
print("SQLite FTS ready at", DB_PATH)


SQLite FTS ready at artifacts\rag.db


In [24]:
def load_index_and_chunks(index_dir: str = INDEX_DIR) -> Tuple[faiss.Index, List[str], List[Dict]]:
    idx = faiss.read_index(os.path.join(index_dir, "arxiv.index"))
    dcs, metas = [], []
    with open(os.path.join(index_dir, "chunks.jsonl"), "r", encoding="utf-8") as f:
        for line in f:
            rec = json.loads(line)
            dcs.append(rec["text"])
            metas.append({"source": rec["source"]})
    return idx, dcs, metas

# Example usage (uncomment to test reloading):
# index, docs, metadatas = load_index_and_chunks()
# print(len(docs), "chunks reloaded")


## Search helper

In [25]:
from pathlib import Path
import numpy as np
import sqlite3
from collections import defaultdict

ART_DIR = Path(INDEX_DIR) if 'INDEX_DIR' in globals() else Path("artifacts")
DB_PATH = ART_DIR / "rag.db"

# ----------------------------- Dense (FAISS / fallback) -----------------------------
def dense_search(query: str, k: int = 5):
    """Top-k by vector similarity (embeddings are normalized → dot == cosine)."""
    if 'embed_texts' not in globals():
        raise RuntimeError("embed_texts() is not defined. Run the embeddings cell first.")
    if 'index' not in globals():
        raise RuntimeError("Vector index not found. Build/reload the FAISS (or fallback) index.")

    q = embed_texts([query])
    kk = min(k, len(docs))
    if kk <= 0:
        return []

    D, I = index.search(q, kk)
    out = []
    for rnk in range(len(I[0])):
        idx = int(I[0][rnk])
        out.append({
            "score": float(D[0][rnk]),     # higher is better
            "text":  docs[idx],
            "source": metadatas[idx]["source"],
            "chunk_id": idx,
            "algo": "dense"
        })
    return out

# Backward-compat alias:
search = dense_search

# ----------------------------- Keyword (SQLite FTS5 BM25) ---------------------------
def keyword_search(query: str, k: int = 5):
    """
    Full-text search over chunks via SQLite FTS5.
    Returns 'score' as a higher-is-better value (= -bm25), and includes 'kw_bm25'.
    """
    if not DB_PATH.exists():
        raise FileNotFoundError(f"Keyword index not found at {DB_PATH}. Run the SQLite FTS cell to create it.")

    con = sqlite3.connect(DB_PATH)
    con.row_factory = sqlite3.Row
    try:
        rows = con.execute("""
            SELECT c.rowid AS chunk_id, cm.source, c.text, bm25(chunks_fts) AS bm25
            FROM chunks_fts c
            JOIN chunk_meta cm ON cm.chunk_id = c.rowid
            WHERE chunks_fts MATCH ?
            ORDER BY bm25 ASC
            LIMIT ?
        """, (query, k)).fetchall()
    except sqlite3.OperationalError:
        # Fallback if bm25() or fts5 extension is unavailable
        rows = con.execute("""
            SELECT c.rowid AS chunk_id, cm.source, c.text
            FROM chunks_fts c
            JOIN chunk_meta cm ON cm.chunk_id = c.rowid
            WHERE chunks_fts MATCH ?
            LIMIT ?
        """, (query, k)).fetchall()
        out = [{
            "score": 0.0,
            "kw_bm25": None,
            "text": r["text"],
            "source": r["source"],
            "chunk_id": int(r["chunk_id"]),
            "algo": "keyword"
        } for r in rows]
        con.close()
        return out
    finally:
        try: con.close()
        except: pass

    out = []
    for r in rows:
        bm25 = float(r["bm25"]) if r["bm25"] is not None else 0.0
        out.append({
            "score": -bm25,               # invert so higher is better
            "kw_bm25": bm25,
            "text": r["text"],
            "source": r["source"],
            "chunk_id": int(r["chunk_id"]),
            "algo": "keyword"
        })
    return out

# ----------------------------- MMR (diversified dense) ------------------------------
def search_mmr(
    query: str,
    k: int = 5,
    fetch_k: int = 40,        # how many dense candidates to fetch before diversification
    lambda_mult: float = 0.6, # 1.0→pure relevance, 0.0→pure diversity
    per_source_cap: int = 1   # at most N chunks from the same PDF
):
    """MMR over dense candidates to promote diversity and reduce same-doc duplicates."""
    if 'embed_texts' not in globals():
        raise RuntimeError("embed_texts() is not defined.")
    if 'index' not in globals():
        raise RuntimeError("Vector index not found.")

    # Encode query (embeddings normalized)
    q = embed_texts([query])[0]
    n = min(fetch_k, len(docs))
    if n <= 0:
        return []

    D, I = index.search(q[None, :], n)
    cand_ids = I[0].tolist()
    if not cand_ids:
        return []

    cand_embs = embs[cand_ids]      # (n, d)
    sim_q = cand_embs @ q           # (n,)

    selected = []
    used_per_src = defaultdict(int)
    masked = np.zeros(len(cand_ids), dtype=bool)

    while len(selected) < k and not masked.all():
        if selected:
            S = cand_embs[selected]                       # (m, d)
            penalty = (cand_embs @ S.T).max(axis=1)      # (n,)
        else:
            penalty = np.zeros(len(cand_ids), dtype="float32")

        mmr = lambda_mult * sim_q - (1.0 - lambda_mult) * penalty

        # Mask already-chosen and sources exceeding the cap
        for j in range(len(cand_ids)):
            if masked[j]:
                mmr[j] = -1e9
                continue
            src = metadatas[cand_ids[j]]["source"]
            if used_per_src[src] >= per_source_cap:
                mmr[j] = -1e9

        j_best = int(np.argmax(mmr))
        if mmr[j_best] <= -1e8:
            break

        selected.append(j_best)
        masked[j_best] = True
        src = metadatas[cand_ids[j_best]]["source"]
        used_per_src[src] += 1

    results = []
    for j in selected:
        idx = cand_ids[j]
        results.append({
            "score": float(sim_q[j]),
            "text": docs[idx],
            "source": metadatas[idx]["source"],
            "chunk_id": int(idx),
            "algo": "mmr"
        })
    return results

# ----------------------------- Hybrid (dense + keyword) -----------------------------
def _minmax(arr):
    a = np.asarray(arr, dtype="float32")
    lo, hi = float(np.min(a)), float(np.max(a))
    if hi - lo < 1e-9:
        return np.ones_like(a) * 0.5
    return (a - lo) / (hi - lo)

def hybrid_search(
    query: str,
    k: int = 3,
    k_dense: int = 10,
    k_kw: int = 10,
    alpha: float = 0.6,    # weight on dense (0..1)
    use_mmr: bool = False  # set True to use diversified dense candidates
):
    """Blend normalized dense and keyword scores into a single ranking."""
    d_hits = search_mmr(query, k=k_dense, fetch_k=max(40, 3*k_dense), lambda_mult=0.6, per_source_cap=1) \
             if use_mmr else dense_search(query, k=k_dense)
    kw_hits = keyword_search(query, k=k_kw)

    by_id = {}
    for h in d_hits:
        by_id.setdefault(h["chunk_id"], {"text": h["text"], "source": h["source"]})
        by_id[h["chunk_id"]]["dense"] = h["score"]

    for h in kw_hits:
        by_id.setdefault(h["chunk_id"], {"text": h["text"], "source": h["source"]})
        by_id[h["chunk_id"]]["kw_raw"] = h.get("kw_bm25", -h["score"])  # if we inverted earlier

    ids = list(by_id.keys())
    dense_scores = [by_id[i].get("dense", 0.0) for i in ids]

    # For keyword: smaller bm25 is better → invert (negate) then normalize
    kw_raw = [by_id[i].get("kw_raw", None) for i in ids]
    max_bad = max([x for x in kw_raw if x is not None], default=1.0)
    kw_inv = [-(x if x is not None else max_bad*1.2) for x in kw_raw]

    dense_n = _minmax(dense_scores)
    kw_n    = _minmax(kw_inv)
    final   = alpha * dense_n + (1.0 - alpha) * kw_n

    merged = [{
        "chunk_id": i,
        "source": by_id[i]["source"],
        "text":   by_id[i]["text"],
        "dense":  float(dense_n[j]),
        "kw":     float(kw_n[j]),
        "score":  float(final[j]),     # higher is better
        "algo":   "hybrid"
    } for j, i in enumerate(ids)]

    merged.sort(key=lambda r: r["score"], reverse=True)
    return merged[:k]

# ----------------------------- Quick smoke tests (optional) -------------------------
# print(dense_search("what problems are studied?", k=3))
# print(keyword_search("dataset OR benchmark", k=3))
# print(search_mmr("limitations", k=3))
# print(hybrid_search("evaluation metrics", k=3, use_mmr=True))


## Evaluation cell

In [26]:
import sqlite3, re
from pathlib import Path

DB_PATH = Path(INDEX_DIR) / "rag.db" if 'INDEX_DIR' in globals() else Path("artifacts/rag.db")

def _fts_query_from_text(text: str) -> str:
    # keep only word tokens; quote each to avoid FTS operators/punct errors
    toks = re.findall(r"[A-Za-z0-9_]+", text.lower())
    # AND semantics by space-joining quoted tokens; works with tokenize='porter'
    return " ".join(f'"{t}"' for t in toks) if toks else ""

def keyword_search(query: str, k: int = 5):
    if not DB_PATH.exists():
        raise FileNotFoundError(f"Keyword index not found at {DB_PATH}. Build the SQLite FTS DB first.")

    q_fts = _fts_query_from_text(query)
    if not q_fts:
        return []

    con = sqlite3.connect(DB_PATH)
    con.row_factory = sqlite3.Row
    try:
        # bm25() may not exist on very old SQLite builds; we catch that below
        rows = con.execute("""
            SELECT c.rowid AS chunk_id, cm.source, c.text, bm25(chunks_fts) AS bm25
            FROM chunks_fts c
            JOIN chunk_meta cm ON cm.chunk_id = c.rowid
            WHERE chunks_fts MATCH ?
            ORDER BY bm25 ASC
            LIMIT ?
        """, (q_fts, k)).fetchall()
        out = [{
            "score": -float(r["bm25"]),     # invert so higher=better
            "kw_bm25": float(r["bm25"]),
            "text": r["text"],
            "source": r["source"],
            "chunk_id": int(r["chunk_id"]),
            "algo": "keyword"
        } for r in rows]
    except sqlite3.OperationalError:
        # Fallback when bm25() or fts5 module lacks bm25 ranking
        rows = con.execute("""
            SELECT c.rowid AS chunk_id, cm.source, c.text
            FROM chunks_fts c
            JOIN chunk_meta cm ON cm.chunk_id = c.rowid
            WHERE chunks_fts MATCH ?
            LIMIT ?
        """, (q_fts, k)).fetchall()
        out = [{
            "score": 0.0,
            "kw_bm25": None,
            "text": r["text"],
            "source": r["source"],
            "chunk_id": int(r["chunk_id"]),
            "algo": "keyword"
        } for r in rows]
    finally:
        con.close()

    return out


## Retrieval Report
Generate a markdown report with ≥5 queries and their top-3 retrieved passages.


In [27]:
import pathlib, datetime

def run_retrieval_report(queries, k=3, out_path=os.path.join(INDEX_DIR, "retrieval_report.md")):
    """
    Generates a markdown report with >=5 queries and their top-k retrieved passages.
    """
    ts = datetime.datetime.now().isoformat(timespec="seconds")
    lines = [f"# Retrieval Report\n\nGenerated: {ts}\n\n"]
    for qi, q in enumerate(queries, 1):
        lines.append(f"## Q{qi}. {q}\n")
        hits = search(q, k=k)
        for i, h in enumerate(hits, 1):
            excerpt = h["text"][:800].replace("\n", " ")
            lines.append(f"**Top {i}** (score {h['score']:.3f}) — *{h['source']}*\n\n> {excerpt} …\n")
        lines.append("\n---\n")
    pathlib.Path(out_path).write_text("\n".join(lines), encoding="utf-8")
    print(f"Saved report to {out_path}")

# Example starter queries (customize for your paper set)
example_queries = [
    "What problem do these papers address?",
    "Summarize the main methodology used.",
    "What datasets are commonly used?",
    "What are the key contributions mentioned?",
    "What future work or limitations are discussed?"
]

# Uncomment to run:
# run_retrieval_report(example_queries, k=3)


## Simple RAG answer (LLM call placeholder)


In [None]:
import os, textwrap
from typing import List, Dict, Optional
from openai import OpenAI


def build_prompt(question: str, passages):
    lines = [
        "You are a careful assistant. Answer ONLY using the context below.",
        "Cite sources like [1], [2]. If at least two sources are provided, use at least two distinct citations.",
        "If the answer is not covered, say you don't have enough information.",
        "",
        f"Question: {question}",
        "",
        "Context:",
    ]
    for i, p in enumerate(passages, 1):
        snippet = p["text"].strip()[:1800]
        lines.append(f"[{i}] ({p['source']})\n{snippet}\n")
    return "\n".join(lines)

def _get_openai_client() -> OpenAI:
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise RuntimeError(
            "OPENAI_API_KEY is not set. Run the setup cell to set it, "
            "or create the client with OpenAI(api_key='...')."
        )
    return OpenAI(api_key=api_key)

def ask_llm(
    question: str,
    k: int = 3,
    model: str = "gpt-4o-mini",
    alpha: float = 0.6,       # weight for dense vs keyword in hybrid
    use_mmr: bool = True      # diversify dense candidates before merging
) -> Dict:
    # Retrieve (hybrid if available; else fall back to dense 'search')
    try:
        hits = hybrid_search(
            question,
            k=k,
            k_dense=10,
            k_kw=10,
            alpha=alpha,
            use_mmr=use_mmr
        )
    except NameError:
        hits = search(question, k=k)  # fallback to your original dense search

    if not hits:
        return {"question": question, "answer": "(no passages retrieved)", "hits": []}

    # Prompt
    prompt = build_prompt(question, hits)

    # Call OpenAI
    client = _get_openai_client()
    resp = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2,
    )
    answer = resp.choices[0].message.content.strip()

    # Pretty print
    print("\n" + "="*80)
    print("Q:", question)
    print("-"*80)
    print(answer)
    print("-"*80)
    print("Sources:")
    for i, h in enumerate(hits, 1):
        dense = f"{h.get('dense', float('nan')):.3f}" if 'dense' in h else "-"
        kw = f"{h.get('kw', float('nan')):.3f}"       if 'kw' in h else "-"
        print(f"[{i}] {h['source']}  (score={h['score']:.3f}, dense={dense}, kw={kw})")
    print("="*80 + "\n")

    return {"question": question, "answer": answer, "hits": hits}

# ---- Ask 5 relevant questions about your corpus ----
five_questions = [
    "What problem do these papers most commonly address?",
    "Summarize the typical methodology or architecture described across the papers.",
    "Which datasets or benchmarks are most frequently mentioned?",
    "What key limitations or open challenges are discussed?",
    "What future directions or proposed improvements recur across the papers?"
]

results = [ask_llm(q, k=3) for q in five_questions]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Q: What problem do these papers most commonly address?
--------------------------------------------------------------------------------
The papers most commonly address issues related to the evaluation and understanding of complex systems, particularly in the fields of medical imaging and artificial intelligence. 

The first paper focuses on the development of a dataset for visual question answering in gastrointestinal imaging, emphasizing the complexity of questions and the clinical categorization of findings in medical images [1]. 

The second paper discusses the challenges of detecting harmful content online and the implications of misinformation, highlighting the need for better understanding and evaluation of AI-generated content [2]. 

The third paper examines the inconsistencies in explanation methods for AI models, proposing a probabilistic and spectral analysis to better understand these issues and suggesting preliminary solutions [3]. 

Overall, these works collectively addr

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Q: Summarize the typical methodology or architecture described across the papers.
--------------------------------------------------------------------------------
The typical methodology described in the papers involves a structured approach to design evaluation and AI system functionality. 

In the **Agentic Design Review System** outlined in [1], the methodology includes a **Structured Design Description (SDD)** module that generates design descriptions to guide the review process. This system employs a **meta agent** to coordinate both static and dynamic agents. Static agents evaluate fixed design attributes, while dynamic agents assess attributes contextualized to specific designs. The meta agent consolidates insights from these evaluations to provide a final rating and actionable feedback, fostering a structured critique process that balances subjective creativity with objective design principles.

In the context of AI systems, as discussed in [2], various methodologies are categ

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Q: Which datasets or benchmarks are most frequently mentioned?
--------------------------------------------------------------------------------
The most frequently mentioned datasets or benchmarks in the provided context are:

1. **Landmark-4K dataset** - A proposed dataset consisting of 185 high-quality landmark images covering 49 categories, used for evaluating image super-resolution methods [1].
2. **CUFED5 dataset** - A testing dataset consisting of 126 image pairs, utilized in the evaluation of super-resolution methods [1].
3. **WR-SR dataset** - A dataset with 80 images, also used for evaluating super-resolution techniques [1].
4. **UniBench300** - A unified benchmark that incorporates multi-task data for visual object tracking and benchmarking [2].
5. **LasHeR, VisEvent, and DepthTrack** - Datasets used for training and evaluation in the context of unified tracking methods [2].
6. **CIFAR-100 and ImageNet-R** - Datasets used for efficiency evaluations in federated learning [3].

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Q: What key limitations or open challenges are discussed?
--------------------------------------------------------------------------------
The key limitations or open challenges discussed include:

1. The generalization gap across different skills in reasoning tasks, where advanced reasoning capabilities are a significant differentiator among models, particularly in complex reasoning tasks compared to more direct perceptual tasks [3].
2. The brittleness of instruction-following capabilities in models, which can be context-dependent and lead to failures in specific tasks despite strong performance in others [3].

These challenges highlight the need for improvements in reasoning and instruction adherence in large language models.
--------------------------------------------------------------------------------
Sources:
[1] 2508.10751v1 - Passk Training for Adaptively Balancing Exploration and Exploitation of Large Reasoning Models.pdf  (score=0.800, dense=1.000, kw=0.500)
[2] 2508.10599v

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Q: What future directions or proposed improvements recur across the papers?
--------------------------------------------------------------------------------
The future directions and proposed improvements across the papers include:

1. **Adaptive Unmasking/Remasking Strategies**: The use of adaptive strategies like remasking in diffusion language models is highlighted as a way to enhance efficiency and quality. This approach allows for a trade-off between compute resources and output quality, which is essential for improving model performance during inference [1].

2. **Guidance Techniques**: The implementation of guidance techniques, such as classifier-free guidance, is emphasized as a method to improve the quality of generated outputs by steering the generative process towards desired attributes. This technique has become foundational in various text-to-image systems and is being adopted for prompt-controlled generation in diffusion language models [1].

3. **Transparency and Explai

## FastAPI endpoints (default top-3)
Run with: `uvicorn app:app --reload --port 8000`


In [None]:
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()

class Query(BaseModel):
    query: str
    k: int = 3
    alpha: float = 0.6

@app.post("/hybrid_search")
def do_hybrid(q: Query):
    hits = hybrid_search(q.query, k=q.k, k_dense=10, k_kw=10, alpha=q.alpha)
    for h in hits: h["text"] = h["text"][:500]
    return {"results": hits}
