<a href="https://colab.research.google.com/github/atulX7/Medchatbot/blob/main/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install langchain torch accelerate bitsandbytes transformers sentence_transformers faiss_cpu chainlit huggingface_hub ctransformers





In [11]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer
import chainlit as cl

DATA_PATH = 'data/'
DB_FAISS_PATH = 'vectorstore/db_faiss'


In [12]:
def create_vector_db():
    loader = DirectoryLoader(DATA_PATH, glob='*.pdf', loader_cls=PyPDFLoader)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})
    db = FAISS.from_documents(texts, embeddings)
    db.save_local(DB_FAISS_PATH)

if __name__ == "__main__":
    create_vector_db()


In [13]:
custom_prompt_template = """
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""

def set_custom_prompt():
    prompt = PromptTemplate(template=custom_prompt_template, input_variables=['context', 'question'])
    return prompt

def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=db.as_retriever(search_kwargs={'k': 2}), return_source_documents=True, chain_type_kwargs={'prompt': prompt})
    return qa_chain


In [14]:
def load_llm():
    llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_type="llama", max_new_tokens=512, temperature=0.5)
    return llm

def qa_bot():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
    db = FAISS.load_local(DB_FAISS_PATH, embeddings)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)
    return qa


In [15]:
def prepare_query(query, max_length=512):
    tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
    inputs = tokenizer(query, truncation=True, max_length=max_length, return_tensors="pt")
    truncated_query = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
    return truncated_query


In [17]:
def final_result(query):
    prepared_query = prepare_query(query)
    qa_result = qa_bot()
    response = qa_result({'query': prepared_query})
    return response




Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

{'query': 'widening reach', 'result': "The Bank has registered a Y-o-Y growth of 19.9% in terms of customer base during the last year, indicating an increase in demand for direct finance from customers. This growth is likely due to the efforts of MSMEs to diversify their revenue streams and expand their businesses during times of uncertainty, such as the COVID-19 pandemic.\nThe Bank's reach has increased to 7,910 customers as of March 2021, up from 6,595 as of March 2020, indicating a significant increase in demand for direct finance from customers. This growth is likely due to the efforts of MSMEs to diversify their revenue streams and expand their businesses during times of uncertainty, such as the COVID-19 pandemic.\nThe Bank has recorded 66% growth in customer base during the last three years, indicating a significant increase in demand for direct finance from customers. This growth is likely due to the efforts of MSMEs to diversify their revenue streams and expand their businesses

In [18]:
# Example Usage
print(final_result("Empowerment"))

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

{'query': 'empowerment', 'result': '1. The programme LEAP is aimed at supporting sea buckthorn-based livelihoods in Leh.\n2. The programme aims to create 15 enterprises, covering 75 entrepreneurs and impacting 750 families.\n3. The programme suppports Samridhi Fund and its investee companies operating in the social impact space.\n4. At the fund level, suppor t is for strengthening ESG and showcasing the impact created by the fund.', 'source_documents': [Document(page_content='Programme – LEAP - in Leh\n• Swavalamban LEAP , to be launched shor tly, aims to suppor t Sea \n Buckthorn-based livelihood in Leh\n• Aims to create 15 enterprises, covering 75 entrepreneurs and \n impacting 750 families\nSocial  Enterprises  &  Impac t  Investment\n• Aims to provide suppor t to Samridhi Fund and its investee companies \n oper ating in the social impact space. \n• At the Fund level, suppor t is for strengthening ESG and showcasing \n the  impact  created  by  the  fund.', metadata={'source': 'data