In [3]:
# Import the os module to interact with the operating system environment variables
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Read PDF File

In [4]:
file_path = "heartstart RAG.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

300


In [5]:
print(docs[0].page_content)
print(docs[0].metadata)

Instructions for Use
HeartStart Intrepid
Monitor /D efibrillator
867172 
 English

{'source': 'heartstart RAG.pdf', 'page': 0}


# Chunking Text

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()




# Create Pinecone index

In [None]:
from pinecone import Pinecone, ServerlessSpec
import time
INDEX_NAME = "chatbotqa-index"
USE_SERVERLESS = True
spec = ServerlessSpec(cloud = 'aws', region = 'us-east-1')
# Initialize a ServerlessSpec object for AWS with the specified region
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
# Check if the index already exists in the current PC (presumably a database or similar)
if USE_SERVERLESS:
    if INDEX_NAME in pc.list_indexes().names():
        # If the index exists, print a message indicating its existence
        print(f"Index `{INDEX_NAME}` already exists")
        INDEX = pc.Index(INDEX_NAME)        
        # Print detailed statistics about the existing index
        print(INDEX.describe_index_stats())
        
    # If the index does not exist, proceed to create a new one
    else:
        # Create a new index with specific parameters        
        pc.create_index(
            name=INDEX_NAME,
            dimension=768,
            metric="cosine",
            spec=spec
        )
        
    # Wait for the index to be initialized before proceeding
    while not pc.describe_index(INDEX_NAME).status['ready']:
        # Sleep for 1 second to avoid overloading the system with requests
        time.sleep(1)
    
    # Once the index is ready, print a confirmation message
    print(f"Index with name `{INDEX_NAME}` is created")
    
    # Retrieve the newly created index object
    index = pc.Index(INDEX_NAME)
    
    # Print detailed statistics about the newly created index
    print(index.describe_index_stats())

Index with name `chatbotqa-index` is created
{'dimension': 768,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}


# Insert into Pinecone Vector DB Index

In [57]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
        splits,
        index_name=INDEX_NAME,
        embedding=embeddings
    )

In [55]:
print(embeddings)

client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
) model_name='sentence-transformers/all-mpnet-base-v2' cache_folder=None model_kwargs={} encode_kwargs={} multi_process=False show_progress=False


# Retreive context

In [42]:
from langchain_pinecone import PineconeVectorStore
INDEX_NAME = "chatbotqa-index"
docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)

In [74]:
retriver = docsearch.as_retriever(search_kwargs={"k": 3})
results = retriver.invoke("How to install the battery?")


In [80]:
for doc in results:
    print(doc.metadata)

{'page': 32.0, 'source': 'heartstart RAG.pdf'}
{'page': 32.0, 'source': 'heartstart RAG.pdf'}
{'page': 4.0, 'source': 'heartstart RAG.pdf'}


# Model Setup

In [44]:
from langchain.llms import HuggingFaceHub
import os
# Define the repo ID and connect to Mixtral model on Huggingface
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceHub(
  repo_id=repo_id, 
  model_kwargs={"temperature": 0.8, "top_k": 50}, 
  huggingfacehub_api_token=os.getenv('HUGGING_FACE_API_TOKEN')
)

# Create Prompt 

In [117]:
from langchain import PromptTemplate

template = """ <s> [INST] You are an expert in operating and maintaining the Philips HeartStart Intrepid monitor/defibrillator
Users will ask you questions about the device and how to maintain and operate it. 
Use following piece of context to answer the question and return only your response.
Use only the context to answer, do not give references. Simply answer the question without editorial comments.
If you don't know the answer, just say you don't know. 
Keep the answer within 2 sentences and concise.
</s> [/INST]

Context: {context}
Question: {question}
Answer:
"""

prompt = PromptTemplate(
  template=template, 
  input_variables=["context", "question"]
)

# Chain everything together

In [67]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser


rag_chain = (
  {"context": retriver,  "question": RunnablePassthrough()} 
  | prompt 
  | llm
)

In [118]:
from langchain.chains.question_answering import load_qa_chain
chain = load_qa_chain(llm,chain_type="stuff",prompt=prompt)
response = chain({"input_documents": retriver.invoke("How to installthe battery"),"question":"How to install the battery?"},return_only_outputs=False)
response



  Document(page_content='1 Push the Battery Latch up. \n2 The battery will eject out of the compartment. If it does not, pull on the Battery Tab to \ncompletely remove the battery.\n\uf046\uf069\uf067\uf075\uf072\uf065\uf031\uf038 Installing the Battery\nBattery Latch\nBattery Tab', metadata={'page': 32.0, 'source': 'heartstart RAG.pdf'}),
  Document(page_content='Installing the Battery     .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .   19\nRemoving the Battery   .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .   19\nBattery Fuel Gauge .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .   20\nAC Power Cord Guard .      .      .      .      .      .      .      .      .      .      .      .      .      . 

In [95]:
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
rag_chain = RetrievalQA.from_chain_type(
            llm, retriever=docsearch.as_retriever(), chain_type_kwargs={"prompt": prompt}
        )
rag_chain.invoke("How to install the battery?")



{'query': 'How to install the battery?',

In [68]:
result = rag_chain.invoke("How to install the battery?")
print(result)
print(type(result))




You are an expert in operating and maintaining the "Philips HeartStart Intrepid monitor/defibrillator"
Users will ask you questions about the device and how to maintain and operate it. 
Use following piece of context to answer the question. 
If you don't know the answer, just say you don't know. 
Keep the answer within 2 sentences and concise.


Question: How to install the battery?
Return the output as a json with one ker as answe which will have the llm response and the other key as page_number which will be the page numbers from where the context is taken
{
"llm_response": "To install the lithium ion battery:\n1 Align the battery in the battery compartment. Confirm the arrow onthe Battery Tab is positioned at the bottom, see Figure 18.\n2 Push the battery into the battery compartment until the battery latch is locked into plac.",
"page_number": 32.0
}
<class 'str'>


In [103]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are an expert in operating and maintaining the 'Philips HeartStart Intrepid monitor/defibrillator'. Users will ask you questions about the device and how to maintain and operate it. \nAlways answer the query using the provided context information, and not prior knowledge.\nSome rules to follow:\n1. Never directly reference the given context in your answer.\n2. If you don't know the answer, just say you don't know."),
            ("human", "Context information is below.\n---------------------\n{context_str}\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: {query_str}\nAnswer: "),
        ]
    )


In [104]:
def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

In [105]:
rag_chain_from_docs = (
        RunnablePassthrough.assign(context_str=(lambda x: format_docs(x["context_str"])))
        | prompt
        | llm
        | StrOutputParser()
    )

In [106]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
rag_chain_with_source = RunnableParallel(
        {"context_str": docsearch.as_retriever(), "query_str": RunnablePassthrough()}
    ).assign(answer=rag_chain_from_docs)

In [107]:
result = rag_chain_with_source.invoke("How to change the battery?")



In [108]:
result

{'context_str': [Document(page_content='1 Push the Battery Latch up. \n2 The battery will eject out of the compartment. If it does not, pull on the Battery Tab to \ncompletely remove the battery.\n\uf046\uf069\uf067\uf075\uf072\uf065\uf031\uf038 Installing the Battery\nBattery Latch\nBattery Tab', metadata={'page': 32.0, 'source': 'heartstart RAG.pdf'}),
  Document(page_content='Installing the Battery     .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .   19\nRemoving the Battery   .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .   19\nBattery Fuel Gauge .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .      .   20\nAC Power Cord Guard .      .      .      .      .      .      .      .      .      .      .      .

In [85]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(docsearch.as_retriever(), question_answer_chain)
result = rag_chain_with_source.invoke("How to change the battery?")
print(result["answer"])



System: You are an expert in operating and maintaining the 'Philips HeartStart Intrepid monitor/defibrillator'. Users will ask you questions about the device and how to maintain and operate it. 
Always answer the query using the provided context information, and not prior knowledge.
Some rules to follow:
1. Never directly reference the given context in your answer.
2. If you don't know the answer, just say you don't know.
Human: Context information is below.
---------------------
1 Push the Battery Latch up. 
2 The battery will eject out of the compartment. If it does not, pull on the Battery Tab to 
completely remove the battery.
 Installing the Battery
Battery Latch
Battery Tab

Battery and AC Power 2: Device Basics
 19
Battery and AC Power
This section describes basics of power supply. See “Power” on page 31 for a detailed discussion. 
Installing the Battery
 To install the lithium ion battery:
1 Align the battery in the battery compartment. Confirm the arrow on the Battery