In [1]:
#importing libraries
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import os
import uuid
import pinecone


  from tqdm.autonotebook import tqdm


In [2]:
#extract data from pdf

loader = DirectoryLoader(
    path='C:\End to End Projects\Medical Chatbot Gen AI\data',
    glob='*.pdf',
    loader_cls=PyPDFLoader
    )
documents = loader.load()

In [3]:
#creating text chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)

text_chunks = text_splitter.split_documents(documents)

In [4]:
len(text_chunks)

7020

In [5]:
##download embedding model
def download_huggingface_embeddings():
    
    embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')
    
    return embeddings

In [6]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [7]:
query = embeddings.embed_query('Hello')
print(len(query))

384


In [9]:
#initializing pinecone
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)


In [10]:
index_name="medical-chatbot"

In [11]:
#Creating Embeddings for Each of The Text Chunks & storing
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks],
                                embedding,
                                index_name=index_name)

In [11]:
#If we already have an index we can load it like this
docsearch = Pinecone.from_existing_index(index_name,embedding)

query = "What are Allergies"
docs = docsearch.similarity_search(query,k=3)

print(docs)

[Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE", metadata={}), Document(page_content='allergens are the following:\n• plant pollens\n• animal fur and dander\n• body parts from house mites (microscopic creatures\nfound in all houses)\n• house dust• mold spores• cigarette smoke• solvents• cleaners\nCommon food allergens include the following:\n• nuts, especially peanuts, walnuts, and brazil nuts\n• fish, mollusks, and shellfish• eggs• wheat• milk• food additives and preservatives\nThe following types of drugs commo

In [12]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [13]:
PROMPT = PromptTemplate(template=prompt_template,input_variables=['context','question'])
chain_type_kwargs = {'prompt':PROMPT}

In [14]:
llm=CTransformers(model="C:\End to End Projects\Medical Chatbot Gen AI\model\llama-2-7b-chat.ggmlv3.q2_K.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [15]:
qa = RetrievalQA.from_chain_type(llm=llm,
                            chain_type='stuff',
                            retriever=docsearch.as_retriever(search_kwargs={'k':2}),
                            return_source_documents = True,
                            chain_type_kwargs = chain_type_kwargs)
                                           

In [16]:
user_input = input(f'Input Prompt:')
result = qa({'query':user_input})
print(result)

{'query': 'what is throid?', 'result': 'Thyroid - (noun) a gland located in the neck that produces hormones that regulate metabolism, growth, and energy.', 'source_documents': [Document(page_content='terone deficiency, steroid therapy, high-sodium diets ,\ncertain antihypertensive therapies, and Addison’s dis-\nease (an autoimmune disorder).\nResources\nBOOKS\nJacobs, David S., et al. Laboratory Test Handbook. 4th ed. New\nYork: Lexi-Comp Inc., 1996.\nPagana, Kathleen Deska. Mosby’s Manual of Diagnostic and\nLaboratory Tests. St. Louis: Mosby, Inc., 1998.\nJanis O. Flores\nGALE ENCYCLOPEDIA OF MEDICINE 2 101Aldosterone assay\nKEY TERMS\nAldosteronism —A condition in which the adrenal', metadata={}), Document(page_content='terone deficiency, steroid therapy, high-sodium diets ,\ncertain antihypertensive therapies, and Addison’s dis-\nease (an autoimmune disorder).\nResources\nBOOKS\nJacobs, David S., et al. Laboratory Test Handbook. 4th ed. New\nYork: Lexi-Comp Inc., 1996.\nPagana, Kath

In [None]:
user_input = input(f'Input Prompt:')
result = qa({'query':user_input})
print(result)

{'query': 'what is thyroid?', 'result': "Thyroid is a gland located in the neck, just above the Adam's apple. It produces hormones that regulate metabolism, growth, and development. Thyroid hormones control how quickly cells use energy from nutrients and oxygen to perform their functions.", 'source_documents': [Document(page_content='binds to cells in the thyroid gland. Unlike in Grave’sdisease, however, this antibody’s action results in lessthyroid hormone being made.\n• Pemphigus vulgaris. A group of autoimmune disorders\nthat affect the skin.\n•Myasthenia gravis . A condition in which the immune\nsystem attacks a receptor on the surface of muscle cells,preventing the muscle from receiving nerve impulsesand resulting in severe muscle weakness.\n•Scleroderma . Also called CREST syndrome or pro-', metadata={}), Document(page_content='deposited in the membranes of both the lung and kid-neys, causing both inflammation of kidney glomerulus(glomerulonephritis ) and lung bleeding. It is typ

In [17]:
user_input = input(f'Input Prompt:')
result = qa({'query':user_input})
print(result)

{'query': 'what is symptoms of malaria?', 'result': 'The symptoms of malaria include fever, chills, headache, muscle and joint pain, fatigue, nausea, vomiting, diarrhea, abdominal pain, rapid breathing, and wheezing or difficulty breathing in adults. In children, the symptoms may also include fever, vomiting, diarrhea, abdominal pain, and difficulty breathing.', 'source_documents': [Document(page_content='• painful, bluish or purplish fingers or toes\n• puffy or swollen eyelids, face, feet, or lower legs\n• changes in the color of the face• skin rash, itching , or hives\n• yellow eyes or skin\n• severe or continuing headache\n• sore throat and fever, with or without chills• breathing problems or wheezing\n• tightness in the chest\n• dizziness\n• unusual tiredness or weakness• weight gain\nIn addition, patients taking anticoagulant drugs', metadata={}), Document(page_content='that occurs in tropical, subtropical, and some temperateregions of the world. The disease is caused by a parasit

In [18]:
user_input = input(f'Input Prompt:')
result = qa({'query':user_input})
print(result)

{'query': 'what is severe fever?', 'result': 'Severe fever refers to a high temperature, typically above 103°F (39.4°C), that is accompanied by symptoms such as headache, muscle aches, fatigue, and discomfort.', 'source_documents': [Document(page_content='his or her doctor immediately or go to an emergencyroom. Acute lymphangitis could be diagnosed by thefamily doctor, infectious disease specialist, or an emer-gency room doctor. The painful, red streaks just belowthe skin surface and the high fever are diagnostic of acutelymphangitis. A sample of blood would be taken for cul-ture to determine whether the bacteria have entered thebloodstream. A biopsy (removal of a piece of infectedtissue) sample may be taken for culture to identify whichKEY', metadata={}), Document(page_content='his or her doctor immediately or go to an emergencyroom. Acute lymphangitis could be diagnosed by thefamily doctor, infectious disease specialist, or an emer-gency room doctor. The painful, red streaks just bel