In [1]:
from haystack.telemetry import tutorial_running
tutorial_running(35)

In [2]:
from datasets import load_dataset
from haystack import Document

dataset = load_dataset('vblagoje/PubMedQA_instruction', split='train')
dataset = dataset.select(range(1000))

all_documents = [Document(content=doc['context']) for doc in dataset]
all_questions = [doc['instruction'] for doc in dataset]
all_ground_truth_answers = [doc['response'] for doc in dataset]

In [3]:
from typing import List
from haystack import Pipeline
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.document_stores.types import DuplicatePolicy

document_store = InMemoryDocumentStore()

document_embedder = SentenceTransformersDocumentEmbedder(model='sentence-transformers/all-MiniLM-L6-v2')
document_writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE)

indexing = Pipeline()
indexing.add_component(instance=document_embedder, name='document_embedder')
indexing.add_component(instance=document_writer, name='document_writer')

indexing.connect('document_embedder.documents', 'document_writer.documents')

indexing.run({'document_embedder': {'documents': all_documents}})


Batches:   0%|          | 0/32 [00:00<?, ?it/s]

{'document_writer': {'documents_written': 1000}}

In [4]:
import os
from getpass import getpass
from haystack.components.builders import AnswerBuilder, PromptBuilder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.generators import OpenAIGenerator
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")

template = """
You have to answer the following question based on the given context information only.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
Answer:
"""

rag_pipeline = Pipeline()
rag_pipeline.add_component("query_embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"))
rag_pipeline.add_component("retriever", InMemoryEmbeddingRetriever(document_store, top_k=3))
rag_pipeline.add_component("prompt_builder", PromptBuilder(template=template))
rag_pipeline.add_component("generator", OpenAIGenerator(model="gpt-4o-mini"))
rag_pipeline.add_component("answer_builder", AnswerBuilder())

rag_pipeline.connect("query_embedder", "retriever.query_embedding")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "generator")
rag_pipeline.connect("generator.replies", "answer_builder.replies")
rag_pipeline.connect("generator.meta", "answer_builder.meta")
rag_pipeline.connect("retriever", "answer_builder.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x000002A6A2B28DD0>
🚅 Components
  - query_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - generator: OpenAIGenerator
  - answer_builder: AnswerBuilder
🛤️ Connections
  - query_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - retriever.documents -> answer_builder.documents (List[Document])
  - prompt_builder.prompt -> generator.prompt (str)
  - generator.replies -> answer_builder.replies (List[str])
  - generator.meta -> answer_builder.meta (List[Dict[str, Any]])

In [5]:
question = "Do high levels of procalcitonin in the early phase after pediatric liver transplantation indicate poor postoperative outcome?"

response = rag_pipeline.run(
    {
        "query_embedder": {"text": question},
        "prompt_builder": {"question": question},
        "answer_builder": {"query": question},
    }
)
response["answer_builder"]["answers"][0].data

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

'Yes, high levels of procalcitonin (PCT) in the early phase after pediatric liver transplantation indicate a poor postoperative outcome. Patients with high PCT levels on postoperative day 2 experienced higher International Normalized Ratio values on postoperative day 5, suffered more frequently from primary graft non-function, had longer stays in the pediatric intensive care unit, and required more time on mechanical ventilation.'

In [6]:
import random

questions, ground_truth_answers, ground_truth_docs = zip(
  *random.sample(list(zip(all_questions, all_ground_truth_answers, all_documents)), 25)
)

In [8]:
rag_answers = []
retrieved_docs = []

for question in list(questions):
  response = rag_pipeline.run(
    {
      'query_embedder': {'text': question},
      'prompt_builder': {'question': question},
      'answer_builder': {'query': question}
    }
  )

  print(f"Question: {question}")  
  print("Answer from pipeline:")
  print(response["answer_builder"]["answers"][0].data)
  print("\n-----------------------------------\n")
  rag_answers.append(response["answer_builder"]["answers"][0].data)
  retrieved_docs.append(response["answer_builder"]["answers"][0].documents)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Are pre-transplant impedance measures of reflux associated with early allograft injury after lung transplantation?
Answer from pipeline:
Yes, pre-transplant impedance measures of reflux are associated with early allograft injury after lung transplantation. Specifically, prolonged bolus clearance, increased total distal reflux episodes, and increased total proximal reflux episodes were significantly linked to decreased time to early allograft injury.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does protein phosphatase 5 mediate corticosteroid insensitivity in airway smooth muscle in patients with severe asthma?
Answer from pipeline:
Yes, protein phosphatase 5 (PP5) mediates corticosteroid insensitivity in airway smooth muscle (ASM) in patients with severe asthma. Increased levels of PP5 in ASM cells from severe asthmatics were associated with reduced sensitivity to corticosteroids, and knockdown of PP5 restored the repressive action of fluticasone on chemokine production as well as its ability to induce GRα nuclear translocation and GRE-dependent GILZ expression.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does anemia on admission predict short- and long-term outcomes in patients with acute ischemic stroke?
Answer from pipeline:
Yes, anemia on admission predicts both short- and long-term outcomes in patients with acute ischemic stroke. Anemic patients showed higher mortality rates at 7 days, 3 months, and 12 months, which were associated with hemoglobin status among other factors. Additionally, lower hemoglobin levels were inversely associated with recurrent stroke and mortality throughout the 12-month follow-up.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Is high salt intake associated with a higher risk of cardiovascular events : a 7.2-year evaluation of a cohort of hypertensive patients?
Answer from pipeline:
Yes, high salt intake is associated with a higher risk of cardiovascular events in the evaluated cohort of hypertensive patients. The study found that higher urinary sodium excretion (UNa), which corresponds to higher dietary salt intake, was significantly associated with an increased risk of cardiovascular events, as indicated by the logistic regression analysis that identified 24 h UNa as an independent predictor of CV events. Additionally, patients with UNa above the median had a significantly worse CV event-free survival rate.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does chronic psychological stress in high-anxiety rats induce sustained bladder hyperalgesia?
Answer from pipeline:
Yes, chronic psychological stress in anxiety-prone rats induces sustained bladder hyperalgesia, as evidenced by the increased pain response to suprapubic stimulation and decreased response threshold to mechanical hindpaw stimulation observed in rats exposed to chronic water avoidance stress (WAS). This hyperalgesia developed by day 8 of the stress protocol and persisted for more than one month.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does over-expression of small ubiquitin-like modifier proteases 1 predict chemo-sensitivity and poor survival in non-small cell lung cancer?
Answer from pipeline:
Yes, over-expression of small ubiquitin-like modifier proteases 1 (SENP1) is associated with poorer survival in non-small cell lung cancer (NSCLC) patients. The study findings indicate that SENP1 over-expression is linked to higher rates of lymph node metastasis, advanced TNM stage, and increased recurrence or metastasis after chemotherapy. Thus, it can be inferred that SENP1 over-expression may predict poor survival outcomes in NSCLC. However, the context does not provide direct evidence that SENP1 over-expression predicts chemo-sensitivity specifically.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Are multiple pathways responsible for anti-inflammatory and cardiovascular activities of Hordeum vulgare L?
Answer from pipeline:
Yes, multiple pathways are responsible for the anti-inflammatory and cardiovascular activities of Hordeum vulgare L. The study indicates that Hordeum vulgare inhibits both the cyclooxygenase (COX) and lipoxygenase (LOX) pathways of arachidonic acid metabolism, which are key pathways involved in inflammation. Additionally, it elevates the activities of antioxidant enzymes such as superoxide dismutase (SOD) and glutathione peroxidase (GPx), further contributing to its anti-inflammatory and cardioprotective effects. These activities are distributed across various fractions of Hordeum vulgare, showcasing the complexity of its mechanisms.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does population structure from NOS genes correlate with geographical differences in coronary incidence across Europe?
Answer from pipeline:
Yes, the population structure estimates from NOS regions are closely correlated with coronary event rates and classical risk parameters among European populations, indicating a relationship between population structure and geographical differences in coronary incidence across Europe.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does unilateral Hypothalamus Inactivation prevent PTZ Kindling Development through Hippocampal Orexin Receptor 1 Modulation?
Answer from pipeline:
Yes, unilateral hypothalamus inactivation prevents PTZ kindling development through the modulation of hippocampal orexin receptor 1 (OX1R). The inactivation of the lateral hypothalamic area (LHA) resulted in decreased hippocampal glutamate content and reduced convulsive behavior induced by PTZ, indicating a role for orexin signaling in the process.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does diet-induced obesity prevent the development of acute traumatic coagulopathy?
Answer from pipeline:
Yes, diet-induced obesity appears to prevent the development of acute traumatic coagulopathy (ATC) following injury-induced hemorrhagic shock. In the study, obesity-prone (OP) rats did not show the same impairment in thrombelastography (TEG) parameters following shock as observed in control and obesity-resistant (OR) rats, suggesting that obesity may counteract the hypocoagulable state typically induced by acute traumatic coagulopathy.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Is circulating soluble LR11 , a novel marker of smooth muscle cell proliferation , enhanced after coronary stenting in response to vascular injury?
Answer from pipeline:
Yes, circulating soluble LR11 levels are enhanced after coronary stenting in response to vascular injury, as indicated by the significant increase in these levels on days 14 and 60 after the procedure.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Do central adenosine A1 and A2A receptors mediate the antinociceptive effects of neuropeptide S in the mouse formalin test?
Answer from pipeline:
Yes, central adenosine A1 and A2A receptors mediate the antinociceptive effects of neuropeptide S in the mouse formalin test, as their respective antagonists (DPCPX for A1 and ZM241385 for A2A) blocked the antinociceptive effects of NPS.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does astaxanthin prevent TGFβ1-induced pro-fibrogenic gene expression by inhibiting Smad3 activation in hepatic stellate cells?
Answer from pipeline:
Yes, astaxanthin prevents TGFβ1-induced pro-fibrogenic gene expression by inhibiting Smad3 activation in hepatic stellate cells. It was noted that ASTX significantly decreased TGFβ1-induced expression of α-smooth muscle actin (α-SMA) and procollagen type 1, alpha 1 (Col1A1) at both mRNA and protein levels. Additionally, ASTX attenuated TGFβ1-induced Smad3 phosphorylation and nuclear translocation, indicating its role in inhibiting Smad3 activation.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does early-life stress selectively affect gastrointestinal but not behavioral responses in a genetic model of brain-gut axis dysfunction?
Answer from pipeline:
Yes, early-life stress selectively affects gastrointestinal responses but not behavioral responses in the genetic model of brain-gut axis dysfunction, as indicated by the findings that maternal separation (MS) did not further influence anxiety- and depressive-like behaviors in the WKY animals, but it did lead to significantly increased colonic visceral hypersensitivity and altered colonic cholinergic sensitivity.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Do endothelial progenitor cells induce transplant arteriosclerosis via VEGFR-1/2 activity?
Answer from pipeline:
The context suggests that endothelial progenitor cells (EPCs) are involved in transplant arteriosclerosis (TA) following acute rejection after organ transplantation. Although it mentions changes in VEGFR-1, pVEGFR-1, VEGFR-2, and pVEGFR-2 levels in EPCs upon exposure to VEGF165 and the VEGFR inhibitor Vandetanib, it does not directly state that EPCs induce TA via VEGFR-1/2 activity. Instead, the study highlights the potential adverse effects of EPCs contributing to TA, implying a complex role rather than a straightforward induction. Thus, the answer to the question is not definitively confirmed within the provided context.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does nT-proBNP and Echocardiographic Parameters for Prediction of Cardiovascular Outcomes in Patients with CKD stage G2-G4?
Answer from pipeline:
Yes, both nT-proBNP and echocardiographic parameters predict cardiovascular outcomes in patients with CKD stage G2-G4. The study found that NT-proBNP remained an independent predictor for adverse cardiovascular events, while certain echocardiographic parameters did not. Additionally, the combination of NT-proBNP with clinical and various echocardiographic variables improved the reclassification of risk for adverse outcomes.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does spinal Cord Stimulation modulate Gene Expression in the Spinal Cord of an Animal Model of Peripheral Nerve Injury?
Answer from pipeline:
Yes, spinal cord stimulation (SCS) therapy modulates gene expression in the spinal cord of an animal model of peripheral nerve injury. The study found that SCS therapy influenced the expression of genes such as 5HT3ra, cFOS, and GABAbr1 in the spinal cord tissues of the rats.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Is hypothalamic damage associated with inflammatory markers and worse cognitive performance in obese subjects?
Answer from pipeline:
Yes, hypothalamic damage is associated with inflammatory markers and worse cognitive performance in obese subjects. The study indicated that patients with hypothalamic λ(1) < 1.072 (a marker of hypothalamic damage) had higher values of body mass index (BMI), fat mass, inflammatory markers, and lower scores on cognitive tests. This suggests a link between hypothalamic damage, inflammation, and cognitive dysfunction in obese individuals.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does common variable immunodeficiency in adults require reserved protocols for long-term follow-up?
Answer from pipeline:
Yes, common variable immunodeficiency (CVID) in adults requires reserved protocols for long-term follow-up, as indicated by the study aiming to establish follow-up protocols for adult patients with CVID in a recently founded clinic. The need for follow-up plans, detection and management of complications/comorbidities, and treatment results suggests that structured protocols are necessary for effective long-term management of CVID.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Do mesalazine and thymoquinone attenuate intestinal tumour development in Msh2 ( loxP/loxP ) Villin-Cre mice?
Answer from pipeline:
Yes, mesalazine and thymoquinone attenuate intestinal tumour development in Msh2(loxP/loxP) Villin-Cre mice. Mesalazine reduced tumour incidence from 94% to 69% and thymoquinone further reduced it to 56%. Additionally, the mean number of tumours was reduced by mesalazine from 3.1 to 1.4 and by thymoquinone to 1.1.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Is structural maintenance of chromosomes 4 a predictor of survival and a novel therapeutic target in colorectal cancer?
Answer from pipeline:
The context does not provide specific information regarding whether structural maintenance of chromosomes 4 (SMC-4) is a predictor of survival or a novel therapeutic target in colorectal cancer. It mentions that SMC-4 expression is significantly higher in colorectal cancer and is associated with various clinical stages and differentiation, suggesting its potential relevance in cancer biology. However, it does not explicitly state its role as a predictor of survival or its viability as a therapeutic target. Further studies would be needed to clarify these aspects.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Is quality of life in stabilized patients with schizophrenia mainly associated with resilience and self-esteem?
Answer from pipeline:
Yes, quality of life in stabilized patients with schizophrenia is correlated moderately with resilience and self-esteem, as indicated in the study.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Is altered perineal microbiome associated with vulvovaginitis and urinary tract infection in preadolescent girls?
Answer from pipeline:
Yes, altered perineal microbiome is associated with vulvovaginitis and urinary tract infections (UTIs) in preadolescent girls. The presence of vulvovaginitis significantly increases periurethral colonization with uropathogenic bacteria, which in turn is linked to a higher prevalence of UTIs in girls with vulvovaginitis compared to those without.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Does leukocyte concentration and composition in platelet-rich plasma ( PRP ) influence the growth factor and protease concentrations?
Answer from pipeline:
Yes, the leukocyte concentration and composition in platelet-rich plasma (PRP) influence the growth factor and protease concentrations. The study found that the leukocyte concentration positively correlated with platelet-derived growth factor-BB (PDGF-BB) and vascular endothelial growth factor (VEGF) concentrations, while it negatively correlated with fibroblast growth factor-basic (FGF-b). Additionally, the concentration of matrix metalloproteinase-9 (MMP-9) strongly correlated with the leukocyte concentration.

-----------------------------------



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question: Is progression of EGFR mutant lung adenocarcinoma driven by alveolar macrophages?
Answer from pipeline:
Yes, the progression of EGFR mutant lung adenocarcinoma is driven by alveolar macrophages. The study indicates that the progression and remission of the tumor are dependent on the expansion and contraction of alveolar macrophages, with local proliferation being the mechanism underlying macrophage expansion. Additionally, the depletion of alveolar macrophages in tumor-bearing mice resulted in a reduction of tumor burden, highlighting their critical role in the development of EGFR mutant adenocarcinoma.

-----------------------------------



In [9]:
from haystack.components.evaluators.document_mrr import DocumentMRREvaluator
from haystack.components.evaluators.faithfulness import FaithfulnessEvaluator
from haystack.components.evaluators.sas_evaluator import SASEvaluator

eval_pipeline = Pipeline()
eval_pipeline.add_component("doc_mrr_evaluator", DocumentMRREvaluator())
eval_pipeline.add_component("faithfulness", FaithfulnessEvaluator())
eval_pipeline.add_component("sas_evaluator", SASEvaluator(model="sentence-transformers/all-MiniLM-L6-v2"))

results = eval_pipeline.run(
    {
        "doc_mrr_evaluator": {
            "ground_truth_documents": list([d] for d in ground_truth_docs),
            "retrieved_documents": retrieved_docs,
        },
        "faithfulness": {
            "questions": list(questions),
            "contexts": list([d.content] for d in ground_truth_docs),
            "predicted_answers": rag_answers,
        },
        "sas_evaluator": {"predicted_answers": rag_answers, "ground_truth_answers": list(ground_truth_answers)},
    }
)

100%|██████████| 25/25 [01:00<00:00,  2.44s/it]


In [10]:
from haystack.evaluation.eval_run_result import EvaluationRunResult

inputs = {
    "question": list(questions),
    "contexts": list([d.content] for d in ground_truth_docs),
    "answer": list(ground_truth_answers),
    "predicted_answer": rag_answers,
}

evaluation_result = EvaluationRunResult(run_name="pubmed_rag_pipeline", inputs=inputs, results=results)
evaluation_result.score_report()

Unnamed: 0,metrics,score
0,doc_mrr_evaluator,1.0
1,faithfulness,1.0
2,sas_evaluator,0.708761


In [11]:
import pandas as pd

results_df = evaluation_result.to_pandas()
top_3 = results_df.nlargest(3, "sas_evaluator")
bottom_3 = results_df.nsmallest(3, "sas_evaluator")
pd.concat([top_3, bottom_3])

Unnamed: 0,question,contexts,answer,predicted_answer,doc_mrr_evaluator,faithfulness,sas_evaluator
11,Do central adenosine A1 and A2A receptors medi...,[The present study aimed to investigate the in...,The above findings suggest that: (i) NPS evoke...,"Yes, central adenosine A1 and A2A receptors me...",1.0,1.0,0.891792
15,Does nT-proBNP and Echocardiographic Parameter...,[Natriuretic peptides and echocardiographic pa...,Our data confirm NT-proBNP is an independent p...,"Yes, both nT-proBNP and echocardiographic para...",1.0,1.0,0.866158
0,Are pre-transplant impedance measures of reflu...,[Acid reflux has been associated with poorer o...,"Prolonged bolus clearance, increased total dis...","Yes, pre-transplant impedance measures of refl...",1.0,1.0,0.855975
6,Are multiple pathways responsible for anti-inf...,[Hordeum vulgare L. (HV or barley) is used by ...,These results suggest the likely mechanisms re...,"Yes, multiple pathways are responsible for the...",1.0,1.0,0.401589
23,Does leukocyte concentration and composition i...,[Platelet-rich plasma (PRP) therapy has become...,These findings demonstrate that leukocytes str...,"Yes, the leukocyte concentration and compositi...",1.0,1.0,0.529442
13,Does early-life stress selectively affect gast...,[Early-life stress and a genetic predispositio...,"Our data suggest that early-life stress, on th...","Yes, early-life stress selectively affects gas...",1.0,1.0,0.598352
