In [1]:
%load_ext autoreload
%autoreload 2

from rag.generation.local_llm import LocalLLM

llm_model = LocalLLM(
    model_name='Qwen/Qwen2.5-7B-Instruct',
    device='cuda'
)
llm_model

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 88.93it/s]


<rag.generation.local_llm.LocalLLM at 0x7499ff608980>

In [2]:
llm_model.model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=3584, out_features=3584, bias=True)
          (k_proj): Linear(in_features=3584, out_features=512, bias=True)
          (v_proj): Linear(in_features=3584, out_features=512, bias=True)
          (o_proj): Linear(in_features=3584, out_features=3584, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (up_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (down_proj): Linear(in_features=18944, out_features=3584, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((3584,), eps=1e-06)
    (rotary_emb):

In [3]:
from rag.retrieval.vector_store import DocumentStore
from rag.embeddings import create_embedder
from rag.config import settings

embedder = create_embedder(settings)

store = DocumentStore(embedder)
store.load("bioasq-mini")
store

DocumentStore(total=40221)

In [4]:
user_question = "Which disorders are associated with histamine receptors?"

retrieved_docs = store.search(user_question, k=20)

In [5]:
len(str(retrieved_docs))

31942

In [11]:
system_prompt = """You are a domain-careful, passage-bound assistant. You will be given:

CONTEXT: a list of dictionaries, each with keys:

text (string) — the passage content (use this only).

distance (number) — smaller usually means more relevant. Use for tie-breaking, not as ground truth.

index (int) — unique identifier for citation (cite this).

QUESTION: the user’s query.

Ground Rules

Use Only the Provided Passages

All factual claims must come solely from text fields within CONTEXT.

Ignore any prior knowledge and external facts.

Cite by index

After each factual claim or at the end of a sentence/ bullet, cite like [idx=2025].

If multiple passages support a claim, cite the strongest 1–3 (prefer lower distance).

Be Concise, Direct, and Structured

Lead with a 1–3 sentence answer.

Use bullets for lists, mechanisms, pros/cons, steps, etc.

Include brief definitions only if needed to answer.

Rank & Filter Passages Sensibly

Prefer passages with lower distance, high topical match, and specific details.

De-duplicate overlapping content; don’t over-cite.

If passages conflict and can’t be resolved, state the disagreement and present both sides with citations.

No Fabrication

Do not invent numbers, dates, mechanisms, or terminology not explicitly present in text.

When Information Is Insufficient

Say: “I don’t have enough information in the provided passages to answer.”

Optionally list what’s missing (e.g., “mechanism”, “dates”, “definitions”).

Biomedical/Technical Care (if applicable)

Distinguish hypotheses vs. established findings when the wording is tentative.

Avoid over-generalization beyond what’s stated.

If species/setting (rodent vs. human, in vitro vs. in vivo) isn’t specified in the passages, don’t assume.

Working Steps (internal)

Parse CONTEXT; extract only text and note each item’s index and distance.

Identify passages most relevant to the QUESTION (favor lower distance).

Synthesize the answer strictly from the chosen passages.

Add minimal, targeted citations using [idx=…].

If conflicts remain unresolved, present both views briefly.

Input Format (exact)
======================== CONTEXT ================================
[{'text': '<passage 1 text>', 'distance': <float>, 'index': <int>},
 {'text': '<passage 2 text>', 'distance': <float>, 'index': <int>},
 ...
]
====================== QUESTION: <user question> ================

Output Format (default)
<Concise answer (1–3 sentences).>

- <Key point 1>. [doc_id=33378]
- <Key point 2>. [doc_id=37076, doc_id=33378]

Citations: [doc_id=33378], [doc_id=37076]

Insufficient Information
I don’t have enough information in the provided passages to answer. I would need <briefly state what is missing>.
Citations: —

Optional (if you want a quick audit trail)

After the answer (keep it short), you may append:

Relevance notes (brief):
- Used doc_id=37076 (lower distance, direct on H3 inverse agonism).
- Used doc_id=33378 (mechanistic distribution & function).
- Skipped doc_id=5234/2025/16517 (off-topic for QUESTION).
"""

prompt = f"""
======================== CONTEXT ================================
{retrieved_docs}

====================== QUESTION: {user_question} ================"""

response = llm_model.generate(
    prompt=prompt,
    system_prompt=system_prompt,
    max_tokens=1024,
    temperature=1.0,
)

print(response)

The histamine receptors are associated with various neurological and psychiatric disorders. Specifically:

- **Alzheimer's Disease**: The H3 receptor, in particular, has been implicated in cognitive functions and may contribute to cognitive impairment in conditions like Alzheimer's disease.
- **Attention Deficit Hyperactivity Disorder (ADHD)**: The H3 receptor is considered a relevant target for potential therapeutic interventions.
- **Neuropathic Pain**: The H3 receptor is involved in pain regulation and has been linked to conditions associated with neuropathic pain.
- **Schizophrenia**: The H3 receptor is a potential target for developing treatments for schizophrenia.
- **Tourette Syndrome**: The H3 receptor plays a role in regulating the cortico-basal ganglia circuitry, which is disrupted in Tourette syndrome. A mouse model with a mutation affecting histamine levels exhibited TS-like phenotypes.
- **Dementia**: Although the benefits of H3 inverse agonists for dementia are inconclusi

In [7]:
def get_rag_response(user_question):
    retrieved_docs = store.search(user_question, k=20)
    prompt = f"""
    ======================== CONTEXT ================================
    {retrieved_docs}

    ====================== QUESTION: {user_question} ================"""

    response = llm_model.generate(
        prompt=prompt,
        system_prompt=system_prompt,
        max_tokens=1024,
        temperature=0.1,
    )
    return response

In [8]:
from datasets import load_dataset

test_ds = load_dataset("rag-datasets/rag-mini-bioasq", "question-answer-passages")['test']
test_ds

Dataset({
    features: ['question', 'answer', 'relevant_passage_ids', 'id'],
    num_rows: 4719
})

In [9]:
row = test_ds[24]
row

{'question': 'Is Alu hypomethylation associated with breast cancer?',
 'answer': 'Yes, Alu elements were found to be hypomethylated in breast cancer, especially in the HER2-enriched subtype. Furthermore, Alu hypomethylation was identified as a late event during breast cancer progression, and in invasive breast cancer, tended to be associated with negative estrogen receptor status and poor disease-free survival of the patients.',
 'relevant_passage_ids': '[20682973, 24971511]',
 'id': 24}

In [10]:
response = get_rag_response(row['question'])
print(response)

- Alu hypomethylation is associated with breast cancer, particularly correlating with negative estrogen receptor (ER) status in invasive breast cancer (IBC). [idx=24971511]

Citations: [idx=24971511]

Relevance notes:
- Used idx=24971511 (directly states the association between Alu hypomethylation and breast cancer characteristics).
