In [6]:
import os
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain_community.vectorstores import Pinecone as LangchainPinecone
from dotenv import load_dotenv
import pinecone
from pinecone import Pinecone, ServerlessSpec
import time

load_dotenv()

True

In [7]:
def load_pdf(data):
    loader=DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )

    documents=loader.load()
    return documents

In [8]:
data_path=r'C:\Users\MSI TM\MedicalChatbot\data/'
extracted_data=load_pdf(data_path)

In [26]:
chunk_size=500
chunk_overlap=20

def text_splitter(extracted_data):
    text_splitter_instance=RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        )
    
    text_chunks = text_splitter_instance.split_documents(extracted_data)  # Use the correct method here
    return text_chunks


In [27]:
text_chunks=text_splitter(extracted_data)

In [28]:
len(text_chunks)

7020

In [29]:
def download_huggingface_embeddings_mode(model_name):
    embeddings_model=HuggingFaceEmbeddings(model_name=model_name)
    return embeddings_model

In [30]:
embeddings=download_huggingface_embeddings_mode("sentence-transformers/all-MiniLM-L6-v2")

In [31]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [32]:
query="Hello World"
len(embeddings.embed_query(query))


384

In [34]:

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = 'medical-chatbot'
index = pc.Index(index_name)

while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

stats = index.describe_index_stats()
print(stats)

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}


In [35]:
doc_search=LangchainPinecone.from_texts(
    [t.page_content for t in text_chunks],
    embeddings,
    index_name=index_name
)

In [36]:
doc_search

<langchain_community.vectorstores.pinecone.Pinecone at 0x2051e9eb290>

In [37]:

docs_search = LangchainPinecone.from_existing_index(
    index_name=index_name,
    embedding=embeddings,
)

query="what is Allergies?"

docs=doc_search.similarity_search(query, k=3)

print(docs[0].page_content)


GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies
Allergic rhinitis is commonly triggered by
exposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.
The presence of an allergen causes the
body's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.
IgE molecules attach to mast
cells, which contain histamine.HistaminePollen grains
Lymphocyte
FIRST EXPOSURE


In [38]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't have the answer, just say that you don't know, don't try to make up answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [39]:
prompt=PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs={"prompt": prompt}

In [40]:
model_path = r"C:\Users\MSI TM\MedicalChatbot\model\llama-2-7b-chat.ggmlv3.q4_0.bin"

llm=CTransformers(
    model=model_path,
    model_type="llama",
    config={'max_new_tokens':512,
            'temperature':0.8
            }
)

In [41]:
QA=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=doc_search.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [1]:
while True:
    user_input = input("Input Prompt (type 'exit' to quit): ")
    if user_input.lower() == "exit":
        print("Exiting the program.")
        break
    result = QA({"query": user_input})
    print("Response:", result["result"])


Exiting the program.
