In [1]:
%%bash

pip install haystack-ai
pip install pymed

Collecting haystack-ai
  Downloading haystack_ai-2.2.0-py3-none-any.whl (345 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 345.2/345.2 kB 6.7 MB/s eta 0:00:00
Collecting lazy-imports (from haystack-ai)
  Downloading lazy_imports-0.3.1-py3-none-any.whl (12 kB)
Collecting openai>=1.1.0 (from haystack-ai)
  Downloading openai-1.31.1-py3-none-any.whl (324 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 324.1/324.1 kB 12.8 MB/s eta 0:00:00
Collecting posthog (from haystack-ai)
  Downloading posthog-3.5.0-py2.py3-none-any.whl (41 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 41.3/41.3 kB 5.6 MB/s eta 0:00:00
Collecting trafilatura (from haystack-ai)
  Downloading trafilatura-1.10.0-py3-none-any.whl (1.0 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 17.9 MB/s eta 0:00:00
Collecting httpx<1,>=0.23.0 (from openai>=1.1.0->haystack-ai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 75.6/75.6 kB 10.2 MB/s eta 0:00:00
Colle



In [5]:
from getpass import getpass

huggingface_token = getpass("Enter your Hugging Face api token:")

Enter your Hugging Face api token:··········


In [6]:
from pymed import PubMed
from typing import List
from haystack import component
from haystack import Document

pubmed = PubMed(tool="Haystack2.0Prototype", email="singh.ayushk1128@gmail.com")

def documentize(article):
  return Document(content=article.abstract, meta={'title': article.title, 'keywords': article.keywords})

@component
class PubMedFetcher():

  @component.output_types(articles=List[Document])
  def run(self, queries: list[str]):
    cleaned_queries = queries[0].strip().split('\n')

    articles = []
    try:
      for query in cleaned_queries:
        response = pubmed.query(query, max_results = 1)
        documents = [documentize(article) for article in response]
        articles.extend(documents)
    except Exception as e:
        print(e)
        print(f"Couldn't fetch articles for queries: {queries}" )
    results = {'articles': articles}
    return results

In [7]:
from haystack.components.generators import HuggingFaceTGIGenerator
from haystack.utils import Secret


keyword_llm = HuggingFaceTGIGenerator(model="mistralai/Mixtral-8x7B-Instruct-v0.1", token=Secret.from_token(huggingface_token))
keyword_llm.warm_up()

llm = HuggingFaceTGIGenerator(model="mistralai/Mixtral-8x7B-Instruct-v0.1", token=Secret.from_token(huggingface_token))
llm.warm_up()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [8]:
from haystack import Pipeline
from haystack.components.builders.prompt_builder import PromptBuilder

keyword_prompt_template = """
Your task is to convert the following question into 3 keywords that can be used to find relevant medical research papers on PubMed.
Here is an examples:
question: "What are the latest treatments for major depressive disorder?"
keywords:
Antidepressive Agents
Depressive Disorder, Major
Treatment-Resistant depression
---
question: {{ question }}
keywords:
"""

prompt_template = """
Answer the question truthfully based on the given documents.
If the documents don't contain an answer, use your existing knowledge base.

q: {{ question }}
Articles:
{% for article in articles %}
  {{article.content}}
  keywords: {{article.meta['keywords']}}
  title: {{article.meta['title']}}
{% endfor %}

"""
keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)

prompt_builder = PromptBuilder(template=prompt_template)
fetcher = PubMedFetcher()

pipe = Pipeline()

pipe.add_component("keyword_prompt_builder", keyword_prompt_builder)
pipe.add_component("keyword_llm", keyword_llm)
pipe.add_component("pubmed_fetcher", fetcher)
pipe.add_component("prompt_builder", prompt_builder)
pipe.add_component("llm", llm)

pipe.connect("keyword_prompt_builder.prompt", "keyword_llm.prompt")
pipe.connect("keyword_llm.replies", "pubmed_fetcher.queries")

pipe.connect("pubmed_fetcher.articles", "prompt_builder.articles")
pipe.connect("prompt_builder.prompt", "llm.prompt")


<haystack.core.pipeline.pipeline.Pipeline object at 0x784c48418d30>
🚅 Components
  - keyword_prompt_builder: PromptBuilder
  - keyword_llm: HuggingFaceTGIGenerator
  - pubmed_fetcher: PubMedFetcher
  - prompt_builder: PromptBuilder
  - llm: HuggingFaceTGIGenerator
🛤️ Connections
  - keyword_prompt_builder.prompt -> keyword_llm.prompt (str)
  - keyword_llm.replies -> pubmed_fetcher.queries (List[str])
  - pubmed_fetcher.articles -> prompt_builder.articles (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [9]:
from IPython.display import display, HTML

def ask(question):
  output = pipe.run(data={"keyword_prompt_builder":{"question":question},
                          "prompt_builder":{"question": question},
                          "llm":{"generation_kwargs": {"max_new_tokens": 500}}})
  print(question)
  print(output['llm']['replies'][0])
  # display(HTML(f'<div style="color: blue">{output["llm"]['replies'][0]}</div>'))


In [10]:
ask("How are mRNA vaccines being used for cancer treatment?")

How are mRNA vaccines being used for cancer treatment?
  The primary symptom of visual snow syndrome (VSS) is the unremitting perception of small, flickering dots covering the visual field. VSS is a serious but poorly understood condition that can interfere with daily tasks. Several studies have provided qualitative data about the appearance of visual snow, but methods to quantify the symptom are lacking. Here, we developed a task in which participants with VSS adjusted parameters of simulated visual snow on a computer monitor until the simulation matched their internal visual snow. On each trial, participants (n = 31 with VSS) modified the size, density, update speed, and contrast of the simulation. Participants' settings were highly reliable across trials (intraclass correlation coefficients > 0.89), and they reported that the task was effective at stimulating their visual snow. On average, visual snow was very small (less than 2 arcmin in diameter), updated quickly (mean temporal fr