<a href="https://colab.research.google.com/github/Zumoari/Hybrid-RAG-Healthcare/blob/main/BuildingAHybridExtractiveRAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Building a hybrid extractive RAG for large scale healthcare literature
This Google Colaboratory provides the code for the RAG created in the above mentioned paper.

The code contains the hybrid RAG architecture with the indexing pipeline and querying pipeline.

This RAG architecture uses the free Mistral API https://mistral.ai/ as LLM. An API key for the Mistral LLM is **not** provided.

For using another LLM please refer to the haystack documentation https://docs.haystack.deepset.ai/docs/generators.


Code for document-preprocessing is not included. For further clarification please read the annotated code below.

In [None]:
# Install libraries

%%capture
!pip install haystack-ai
!pip install "datasets>=2.6.1"
!pip install "sentence-transformers>=2.2.0"
!pip install firebase-admin
!pip install bitsandbytes
!pip install onnxruntime
!pip install qdrant-haystack
!pip install fastembed-haystack
!pip install mistral-haystack
# Due to some issues we used these versions for openai and httpx
# The issue may be resloved in newer versions
# Here is a community blog where the issue is discussed
# https://community.openai.com/t/error-with-openai-1-56-0-client-init-got-an-unexpected-keyword-argument-proxies/1040332
!pip install openai==1.55.3 httpx==0.27.2 --force-reinstall

In [None]:
# Import libraries

import firebase_admin
from firebase_admin import credentials, firestore

from haystack import Document
from haystack import component
from haystack.components.writers import DocumentWriter
from haystack.dataclasses import Document
from typing import List

from haystack.dataclasses import ChatMessage
from haystack.utils import Secret
from haystack import Pipeline
from haystack_integrations.components.generators.mistral import MistralChatGenerator
from haystack.components.generators.utils import print_streaming_chunk
from haystack.dataclasses import ChatMessage
from haystack.components.builders import ChatPromptBuilder
from haystack.components.preprocessors import DocumentSplitter

from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
from haystack_integrations.components.retrievers.qdrant import QdrantHybridRetriever
from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
from haystack.document_stores.types import DuplicatePolicy
from haystack_integrations.components.embedders.fastembed import (
	FastembedTextEmbedder,
	FastembedDocumentEmbedder,
	FastembedSparseTextEmbedder,
	FastembedSparseDocumentEmbedder
)
from jinja2 import Template
import json

In [None]:
# Please preprocess your source documents here
# Your documents should be written in a dictionary called "documents" and contain the following key-value pairs:
# content: contains all the text in the document, which should be used for retrieval
# meta: contains the metadata for your documents. It is up to you, what information should be included.
# Depending on the information in meta please customize the CustomPostProcessor component
# Please refer to the haystack documentation for further information on the document data class https://docs.haystack.deepset.ai/docs/data-classes#document

documents = [Document(content=doc["content"], meta=doc["meta"]) for doc in documents]
len(documents) # Please make sure all documents are contained in this array

In [None]:
# Optional: Split documents into smaller chunks
splitter = DocumentSplitter(
    split_by="sentence",
    split_length=20,
    split_overlap=2
)

result = splitter.run(documents=documents)

docs = result["documents"]
print(f"Processed {len(result['documents'])} document splits.")

In [None]:
# Creation of the document store

document_store = QdrantDocumentStore(
    path="/content/qdrant/storage_local",
    embedding_dim=384, # has to be 384 for the hybrid version
    recreate_index=True, # False, if a Qdrant Document Store already exists
    use_sparse_embeddings=True # For the hybrid RAG
)

In [None]:
# Prompt Template

# The prompt builder inserts the necessary information later
# Please make sure to keep the following parts as is, otherwise the prompt builder is not able to insert the question and the documents
# {{ question }}
# AND
# {% for document in retriever_documents %}
#     [ Document :
#     ID: {{ document.id }}
#     Score: {{ document.score }}
#     Meta: {{ document.meta }}
#     Content: {{ document.content }}
#
#     ]
#
# {% endfor %}
# You may change the information in the for loop. Be sure to check the Data Class "Document": https://docs.haystack.deepset.ai/docs/data-classes#document

template = """
Question: {{ question }}

Answer the question **as clearly and concisely as possible**, only based on the following provided papers:

# General Context from Retriever:
{% for document in retriever_documents %}
    [ Document :
    ID: {{ document.id }}
    Score: {{ document.score }}
    Meta: {{ document.meta }}
    Content: {{ document.content }}

    ]

{% endfor %}


Answer:
"""

In [None]:
MistralGenerator = MistralChatGenerator(api_key=Secret.from_token(your_api_key_here), # Please insert your API key or use env-variables
                                        model="mistral-large-latest",  # Replace with your desired model name
                                        streaming_callback=print_streaming_chunk,  # Optional streaming callback - writes the model output in the console
                                        generation_kwargs={
                                            "temperature": 0.1, # we found that a temperature of 0.1 provides good results. Feel free to change it.
                    })

In [None]:
# Custom Chat Prompt Builder

# This component currently does not do much, only inserts the question and the documents in the prompt template
# Feel free to add to this component

@component
class CustomChatPromptBuilder:
    def __init__(self, template:List[ChatMessage]):
        template = template[0].content
        self.template = Template(template.strip())

    @component.output_types(prompt=List[ChatMessage])
    def run(self, question:str, retriever_documents:List[Document]):

        prompt = self.template.render(question=question, retriever_documents=retriever_documents)
        return {"prompt": [ChatMessage.from_user(content=prompt)]}


In [None]:
# Custom Post Processor

# Creates a string in JSON format of the LLM output and all the sources used to create the answer
# Please modify the for-loop depending on your information in your documents

@component
class CustomPostProcessor:
  @component.output_types(text=str)
  def run(self, answerLLM:List[ChatMessage], sources:List[Document]):
    answer = answerLLM[0].content

    source_list = []
    for index, eA in enumerate(sources, start=1):
        source_list.append({
            "ref": index,
            "id": eA.id,
            "score": eA.score,
            "meta": eA.meta,
            "content": eA.content
        })

    result = {
        "answer": answer,
        "sources": source_list
    }
    return {"text": json.dumps(result, ensure_ascii=False, indent=2)}

In [None]:
# Creation of the indexing pipeline

# Creation of the components
indexing = Pipeline()
indexing.add_component("sparse_doc_embedder", FastembedSparseDocumentEmbedder(model="prithvida/Splade_PP_en_v1"))
indexing.add_component("dense_doc_embedder", FastembedDocumentEmbedder(model="BAAI/bge-small-en-v1.5"))
indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))

# Connection of the components
indexing.connect("sparse_doc_embedder", "dense_doc_embedder")
indexing.connect("dense_doc_embedder", "writer")

In [None]:
# Run the indexing pipeline

# You only need to run this once for source-document indexing
# This may take a while, depending on the amount of document chunks

indexing.run({"sparse_doc_embedder": {"documents": docs}})

In [None]:
# Creation of the querying pipeline

# Creation of the components
querying = Pipeline()
querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="prithvida/Splade_PP_en_v1"))
querying.add_component("dense_text_embedder", FastembedTextEmbedder(
	model="BAAI/bge-small-en-v1.5", prefix="")
	)
querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
querying.add_component("prompt_builder", CustomChatPromptBuilder(template=[ChatMessage.from_user(template)]))
querying.add_component("llm", MistralGenerator)
querying.add_component("custom_processor", CustomPostProcessor())

# Connection of the components
querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
querying.connect("retriever.documents", "prompt_builder.retriever_documents")
querying.connect("prompt_builder", "llm.messages")
querying.connect("retriever.documents", "custom_processor.sources")
querying.connect("llm.replies", "custom_processor.answerLLM")

In [None]:
# The input query to the RAG
question = "PLEASE INSERT YOUR QUESTION HERE"

In [None]:
# Run the querying pipeline
rag_result = querying.run(
    {"dense_text_embedder": {"text": question},
     "sparse_text_embedder": {"text": question},
     "prompt_builder": {"question": question},
     "retriever": {"top_k": 3} # This is the number of document chunks the LLM recieves. Depending on your chunk-size and the max input size of your LLM you can input more or less.
    }
)

In [None]:
print(rag_result) # You can see your result here. Additionally, you could process your results in a JSON file for better readability.