<a href="https://colab.research.google.com/github/adnan417/Medical_Chatbot/blob/main/Medical_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install required packages

In [None]:
!pip install langchain langchain-community langchain-huggingface sentence-transformers faiss-cpu transformers pypdf gradio

Load pdf and convert it into chunks

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = PyPDFLoader("Medical_book.pdf")
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.split_documents(docs)


Convert chunks into vector db and load into a variable vector_db

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.vectorstores import FAISS

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(chunks, embedding)


Save embeddings to disk

In [None]:
vector_db.save_local('faiss_index')

Setup llm

In [None]:
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline

generator = pipeline("text2text-generation", model="google/flan-t5-base", max_length=256)
llm = HuggingFacePipeline(pipeline=generator)


Create prompt template

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = """
You are a knowledgeable medical assistant.
Use only the information from the provided context to answer the user's question.
If the answer is not found in the context, say "I dont know. The question is out of context"
Do NOT use any external knowledge or make up information.

Context:
{context}

Question: {question}

Answer:
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)


Setup retrieval chain

In [None]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_db.as_retriever(search_kwargs={"k": 3}),
    chain_type_kwargs={"prompt": prompt}
)


Query

In [None]:
query = "What is the cure for diabetes?"
response = qa_chain.invoke(query)
print(response['result'])
