In [1]:
#Importing Ollama: Llama2(7B) model
from langchain_community.llms import Ollama


#Importing Prompt Templates
from langchain.prompts import PromptTemplate
from langchain import PromptTemplate

#Importing Vector Databases
import pinecone
from langchain_pinecone import PineconeVectorStore as Pinecone

#Importing Embeddings Models
from langchain_community.embeddings import HuggingFaceEmbeddings

#Import QA chain
from langchain.chains import RetrievalQA

#Import document loaders
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader

#Import recursive text splitter for chunking documents
from langchain.text_splitter import RecursiveCharacterTextSplitter

#Packages to load enviourmennt variables
from dotenv import load_dotenv
import os


  from tqdm.autonotebook import tqdm


In [2]:
#Initialize the environment variables
load_dotenv()

True

In [3]:
#Initialize the Ollama model
llm= Ollama(
    model= "llama2:7b",
    temperature=0.4,
    

)

In [4]:
#Testing the llm
print(llm.invoke("What is the capital of France?"))

Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Unauthorized"}\n')trace=d4b4477e-4d67-44e2-b70f-8e4df7042d03,id=d4b4477e-4d67-44e2-b70f-8e4df7042d03



The capital of France is Paris.


Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Unauthorized"}\n')trace=d4b4477e-4d67-44e2-b70f-8e4df7042d03,id=d4b4477e-4d67-44e2-b70f-8e4df7042d03
Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Unauthorized"}\n')trace=c5479c01-f07c-4c96-82fb-c501b62bea7c,id=c5479c01-f07c-4c96-82fb-c501b62bea7c; trace=c5479c01-f07c-4c96-82fb-c501b62bea7c,id=bd6fabcb-f72b-4727-85ca-c5094753a9ac
Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.

In [5]:
#Loading the data from the data directory
#Reading it from Medical_book.pdf
def data_loader(Data):
    loader= DirectoryLoader(
        Data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    documents=loader.load()
    return documents

In [6]:
data_pdf= data_loader("data/")

In [7]:
#Transforming the documents into chunks
def chunking(data):
    text_split= RecursiveCharacterTextSplitter(
        chunk_size= 500,  # Size of each chunk
        chunk_overlap=50,  # Overlap between chunks
    )
    text_chunks=text_split.split_documents(data_pdf)
    return text_chunks

In [8]:
#Creating chunks
chunks= chunking(data_pdf)
print(f"Length of chunks: {len(chunks)}")

Length of chunks: 5961


In [9]:
#Download sentence-transformers/all-MiniLM-L6-v2 embeddings model from hf library
embeddings= HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    
)
embeddings

  embeddings= HuggingFaceEmbeddings(


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [10]:
#Testing Embeddings Model
test_query= embeddings.embed_query("What is the capital of France?")
print(f"Length of test query: {len(test_query)}")
test_query

Length of test query: 384


[0.08204811811447144,
 0.036055490374565125,
 -0.003892861772328615,
 -0.004881023429334164,
 0.02565109170973301,
 -0.057143475860357285,
 0.012191597372293472,
 0.004678927827626467,
 0.03494987264275551,
 -0.02242191694676876,
 -0.008005278185009956,
 -0.10935358703136444,
 0.022724751383066177,
 -0.029320847243070602,
 -0.04352203384041786,
 -0.1202412024140358,
 -0.0008486161823384464,
 -0.018150126561522484,
 0.0561295747756958,
 0.0030852581840008497,
 0.002336363075301051,
 -0.016839278861880302,
 0.06362470239400864,
 -0.02366023138165474,
 0.031493522226810455,
 -0.034797947853803635,
 -0.020548803731799126,
 -0.002790980739519,
 -0.011037993244826794,
 -0.036126721650362015,
 0.05414104461669922,
 -0.036617130041122437,
 -0.0250086672604084,
 -0.038170404732227325,
 -0.04960361868143082,
 -0.015148085542023182,
 0.021315043792128563,
 -0.012740432284772396,
 0.07670088857412338,
 0.04435575753450394,
 -0.010834893211722374,
 -0.029760019853711128,
 -0.016970476135611534,
 -0

In [11]:


name_index="rag-pinecone-llama2-chatbot"

doc_embed = Pinecone.from_texts(
    [chunk.page_content for chunk in chunks],
    embeddings,
    index_name=name_index,
    # Add these if required by your version:
    # pinecone_api_key=os.getenv("PINECONE_API_KEY"),
    # environment=os.getenv("PINECONE_ENVIRONMENT"),
)

In [None]:
#Retrieving top 3 results based on similarity search from the index

#Code for loading existing index
index_search= Pinecone.from_existing_index(
    index_name=name_index,
    embedding=embeddings
)

query="What are the symptoms of diabetes?"
semantic_search= index_search.similarity_search(query, k=3)

print(f"Top 3 semantic results for query '{query}': \n{semantic_search}")


Top 3 semantic results for query 'What are the symptoms of diabetes?': 
[Document(page_content='affected and can range greatly.\n• Type I diabetes mellitus. Characterized by fatigue and\nan abnormally high level of glucose in the blood\n(hyperglycemia).\n• Amyotrophic lateral schlerosis. First signs are stum-\nbling and difficulty climbing stairs. Later, muscle\ncramps and twitching may be observed as well as\nweakness in the hands making fastening buttons or\nturning a key difficult. Speech may become slowed or\nslurred. There may also be difficluty swallowing. As'), Document(page_content='begin to fall. A person with diabetes mellitus either does\nnot make enough insulin, or makes insulin that does not\nwork properly. The result is blood sugar that remains\nhigh, a condition called hyperglycemia.\nDiabetes must be diagnosed as early as possible. If\nleft untreated, it can damage or cause failure of the eyes,\nkidneys, nerves, heart, blood vessels, and other body\norgans. Hypoglycemia

In [None]:
prompt_template= """You are a medical expert. \
Given the following context from medical documents, answer the following questions.\
    If you don't know answer or no relevant documents with context have been retrieved, say 'I don't know'.\
       context:  {context}\
        question: {question}\
            Just share helpful answers, nothing else
            Helpful Answer: """

In [15]:
Prompt= PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)
chain_type_kwargs= {"prompt": Prompt}

In [17]:
#RetrievalQA chain
chatbot_chain= RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever= index_search.as_retriever(search_kwargs={'k': 3}),
    chain_type_kwargs=chain_type_kwargs,
    return_source_documents= True
)

In [21]:
while True:
    question= input(f"Enter your query: ")
    if question.lower() == "exit":
        print("Exiting the chatbot. Goodbye!")
        break
    response= chatbot_chain.invoke({"query": question})
    print(f"Response: {response['result']}")

KeyboardInterrupt: 

In [None]:
print(f"Response: {response['result']}")

In [None]:
#RetrievalQA chain
'''qa_chain= RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index_search.as_retriever(),
    chain_type_kwargs=chain_type_kwargs,
    return_source_documents=True'''