In [3]:
from langchain import PromptTemplate
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings ## For Converting data to Embeddings
from langchain.embeddings.vertexai import VertexAIEmbeddings ## For Converting data to Embeddings with VertexAI
from langchain.embeddings.openai import OpenAIEmbeddings ## For Converting data to Embeddings with OpenAI
from langchain.vectorstores import Pinecone # For Vector DB
from langchain.vectorstores.pinecone import Pinecone # For Vector DB
import pinecone # For Vector DB
from langchain.document_loaders import PyPDFLoader, DirectoryLoader # For loading the data
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter # For splitting the data in chunks
from langchain.llms import OpenAI, VertexAI, CTransformers # For reasoning logic
from tqdm.autonotebook import tqdm
from sentence_transformers import SentenceTransformer


In [4]:
from dotenv import load_dotenv
load_dotenv()

True

In [5]:
def load_pdf(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents


In [6]:
doc = load_pdf("E:\Girish Documents\Study\Data Science\DataScience_GenAI_Study\GenAI_Project_Medical-Chatbot-Using-LLAMA2\data")
#doc

In [7]:
extracted_data = [doc for doc in doc if doc.page_content.strip()]
#extracted_data

In [8]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [9]:
text_chunks = text_split(extracted_data)
len(text_chunks)

7020

In [10]:
#download embedding model
from sentence_transformers import SentenceTransformer

def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [11]:
embeddings = download_hugging_face_embeddings()

In [12]:
query_result = embeddings.embed_query("Hello World!")
print("Length:", len(query_result))

Length: 384


In [13]:
## Vector Search DB In Pinecone
import os
from pinecone import Pinecone
pc = Pinecone(
    api_key=os.environ["PINECONE_API_KEY"],
    environment="gcp-starter"
)
index_name="medical-bot" 

In [14]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Pinecone
from langchain.vectorstores.pinecone import Pinecone

docsearch = Pinecone.from_documents(text_chunks, embeddings, index_name=index_name)

In [15]:
docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)

query = "What are Allergies?"

docs = docsearch.similarity_search(query, k=3)

print("Result: ", docs)

Result:  [Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-', metadata={'page': 135.0, 'source': 'E:\\Girish Documents\\Study\\Data Science\\DataScience_GenAI_Study\\GenAI_Project_Medical-Chatbot-Using-LLAMA2\\data\\Medical_book.pdf'}), Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-', metadata={'page': 135.0, 'source': 'E:\\Girish Documents\\Study\\Data Science\\DataScience_GenAI_Study\\GenAI_Project_Medical-Chatbot-Using-LLAMA2\\data\\Medical_book.pdf'}), Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The

In [16]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [17]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [18]:
#Execute this --> "huggingface-cli login" in Terminal prompt or conda prompt.

In [19]:
llm=CTransformers(model="E:\Girish Documents\Study\Data Science\llama-2-7b-chat.ggmlv3.q8_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [20]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [None]:
while True:
    user_input=input(f'Input Prompt:')
    result=qa({"query": user_input})
    print("Response: ", result["result"])