## **Build BioMistral Medical RAG Chatbot using BiOMistral Open Source LLM**

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!pip install langchain sentence-transformers chromadb llama-cpp-python langchain_community pypdf

Collecting chromadb
  Downloading chromadb-0.6.2-py3-none-any.whl.metadata (6.8 kB)
Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.5.tar.gz (64.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.5/64.5 MB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting langchain_community
  Downloading langchain_community-0.3.14-py3-none-any.whl.metadata (2.9 kB)
Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.

In [None]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA, LLMChain

##Import the documents


In [None]:
loader=PyPDFDirectoryLoader("/content/drive/MyDrive/BioMistral/Data")
docs=loader.load()

In [None]:
len(docs)

95

In [None]:
docs[5]

Document(metadata={'source': '/content/drive/MyDrive/BioMistral/Data/healthyheart.pdf', 'page': 5}, page_content='If you’re like many people, you may think of heart disease as a\nproblem that happens to other folks. “I feel fine,” you may think,\n“so I have nothing to worry about.” If you’re a woman, you may\nalso believe that being female protects you from heart disease.\nIf you’re a man, you may think you’re not old enough to have a\nserious heart condition.\nWrong on all counts. In the United States, heart disease is the #1\nkiller of both women and men. It affects many people at midlife, \nas well as in old age. It also can happen to those who “feel fine.”\nConsider these facts: \n■ Each year, 500,000 Americans die of heart disease, and approx-\nimately half of them are women.\n■ As early as age 45, a man’s risk of heart disease begins to rise \nsignificantly. For a woman, risk starts to increase at age 55.\n■ Fifty percent of men and 64 percent of women who die suddenly\nof heart 

#Chunking


In [None]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks=text_splitter.split_documents(docs)


In [None]:
len(chunks)

585

In [None]:
chunks[5]

Document(metadata={'source': '/content/drive/MyDrive/BioMistral/Data/healthyheart.pdf', 'page': 3}, page_content='What’s Your Risk? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5\nHow To Talk With Your Doctor . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5')

#Embeddings creations


In [None]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"]="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"


In [None]:
embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")

  embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/6.12k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/667 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.30k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/706k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

#Vector Store creation

In [None]:
vectorstore = Chroma.from_documents(chunks, embeddings)

In [None]:
query="What are the major risk factors of heart disease?"
search_results=vectorstore.similarity_search(query)

In [None]:
search_results

[Document(metadata={'page': 46, 'source': '/content/drive/MyDrive/BioMistral/Data/healthyheart.pdf'}, page_content='heart disease, including inflammation of the artery walls. Several\nemerging risk factors have been identified. We don’t know for\nsure yet whether they lead to heart disease or whether treating\nthem will reduce risk. While these possible risk factors are not'),
 Document(metadata={'page': 6, 'source': '/content/drive/MyDrive/BioMistral/Data/healthyheart.pdf'}, page_content='at least one risk factor for heart disease.\nEvery risk factor counts. Research shows that each individual risk\nfactor greatly increases the chances of developing heart disease.\nMoreover, the worse a particular risk factor is, the more likely you'),
 Document(metadata={'page': 46, 'source': '/content/drive/MyDrive/BioMistral/Data/healthyheart.pdf'}, page_content='42Your Guide to a Healthy Heart\nNew Risk Factors?\nWe know that major risk factors such as high blood cholesterol,\nhigh blood pressure,

In [None]:
retriever=vectorstore.as_retriever(search_kwargs={"k":5})

In [None]:
retriever.get_relevant_documents(query)

  retriever.get_relevant_documents(query)


[Document(metadata={'page': 46, 'source': '/content/drive/MyDrive/BioMistral/Data/healthyheart.pdf'}, page_content='heart disease, including inflammation of the artery walls. Several\nemerging risk factors have been identified. We don’t know for\nsure yet whether they lead to heart disease or whether treating\nthem will reduce risk. While these possible risk factors are not'),
 Document(metadata={'page': 6, 'source': '/content/drive/MyDrive/BioMistral/Data/healthyheart.pdf'}, page_content='at least one risk factor for heart disease.\nEvery risk factor counts. Research shows that each individual risk\nfactor greatly increases the chances of developing heart disease.\nMoreover, the worse a particular risk factor is, the more likely you'),
 Document(metadata={'page': 46, 'source': '/content/drive/MyDrive/BioMistral/Data/healthyheart.pdf'}, page_content='42Your Guide to a Healthy Heart\nNew Risk Factors?\nWe know that major risk factors such as high blood cholesterol,\nhigh blood pressure,

#LLM Model Loading

In [None]:
llm=LlamaCpp(
    model_path="/content/drive/MyDrive/BioMistral/BioMistral-7B.Q4_K_M.gguf",
    temperature=0.2,
    max_tokens=2048,
    top_p=1
)

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /content/drive/MyDrive/BioMistral/BioMistral-7B.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = hub
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.att

##Use LLM and retriever and query, to generate final response

In [None]:
template="""
<|context|>
You are an Medical Assistant that follows the instruction and generate the accurate response based on the query and the context provided.
Please be truthful and give direct answers.
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

In [None]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate

In [None]:
prompt=ChatPromptTemplate.from_template(template)

In [None]:
rag_chain=(
    {"context":retriever,"query":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
  )

In [None]:
response=rag_chain.invoke("query")

llama_perf_context_print:        load time =   34027.36 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    66 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    18 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   47732.62 ms /    84 tokens


In [None]:
response

'I am sorry, I did not understand your question. Can you please rephrase it?'

In [None]:
import sys
while True:
  user_input=input(f"Input query: ")
  if user_input=='exit':
    print("Exiting...")
    sys.exit()
  if user_input=="":
    continue
  result=rag_chain.invoke(user_input)
  print("Answer: ",result)


Input query: exit
Exiting...


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
!pip install gradio
import gradio as gr


def chatbot_ui(user_query):
    if not user_query.strip():
        return "Please enter a valid query."
    try:
        result = rag_chain.invoke(user_query)
        return result
    except Exception as e:
        return f"Error: {str(e)}"


custom_css = """
footer {
    display: none !important;
    text-align: center;
    padding: 10px;
    font-size: 14px;
}

.flag-btn, .gradio-footer {
    display: none !important;
} /* Hide the flag button and Gradio footer */
"""


interface = gr.Interface(
    fn=chatbot_ui,
    inputs=gr.Textbox(
        label="Enter your medical query:",
        placeholder="Ask a medical question here...",
        lines=3,
    ),
    outputs=gr.Textbox(
        label="Chatbot Response",
        lines=3,
    ),
    title="WeCare",
    description="<h2 style='text-align:center;'>Your Trusted Virtual Health Companion – Providing Answers, Support, and Care Anytime.</h2>",
    examples=[
        ["What are the symptoms of diabetes?"],
        ["Explain the risk factors of heart disease."],
        ["How can I reduce cholesterol levels naturally?"],
    ],
    css=custom_css,
)


interface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://75cbc45da1eae2fee4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


