# Import Libraries

In [27]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough



## Load document

In [8]:
loader = PyPDFLoader(r"C:\Users\dsai9\Projects\RAG_Application\data\LeaveNoContextBehind.pdf")
data= loader.load_and_split()

## Chunking

In [9]:
# Split the document into chunks

text_splitter = NLTKTextSplitter(chunk_size=500, chunk_overlap=100)

chunks = text_splitter.split_documents(data)

print(len(chunks))

print(type(chunks[0]))

Created a chunk of size 568, which is longer than the specified 500
Created a chunk of size 506, which is longer than the specified 500
Created a chunk of size 633, which is longer than the specified 500


110
<class 'langchain_core.documents.base.Document'>


##  Embeddings

In [10]:
# Creating Chunks Embedding
# We are just loading OpenAIEmbeddings



embedding_model = GoogleGenerativeAIEmbeddings(google_api_key="AIzaSyC2Bztff9XtDCDrCJfMJ8py9JaT8VkwSlY", 
                                               model="models/embedding-001")

# vectors = embeddings.embed_documents(chunks)

## Store in Vector DB

In [12]:
# Store the chunks in vector store


# Embed each chunk and load it into the vector store
db = Chroma.from_documents(chunks, embedding_model, persist_directory=r"C:\Users\dsai9\Projects\RAG_Application\ChromaDB")

# Persist the database on drive
db.persist()

In [13]:
# Setting a Connection with the ChromaDB
db_connection = Chroma(persist_directory=r"C:\Users\dsai9\Projects\RAG_Application\ChromaDB", embedding_function=embedding_model)

## Retreiving

In [14]:
# Converting CHROMA db_connection to Retriever Object
retriever = db_connection.as_retriever(search_kwargs={"k": 5})

print(type(retriever))

<class 'langchain_core.vectorstores.VectorStoreRetriever'>


## Creating Chain

In [20]:
with open(r'C:\Users\dsai9\Projects\RAG_Application\GEMINI_API_KEY.txt','r') as key:
    GOOGLE_API_KEY=key.read().strip()

In [21]:
chat_template = ChatPromptTemplate.from_messages([
    # System Message Prompt Template
    SystemMessage(content="""You are a Helpful AI Bot. 
    You take the context and question from user. Your answer should be based on the specific context."""),
    # Human Message Prompt Template
    HumanMessagePromptTemplate.from_template("""Aswer the question based on the given context.
    Context:
    {context}
    
    Question: 
    {question}
    
    Answer: """)
])


In [29]:

chat_model = ChatGoogleGenerativeAI(google_api_key=GOOGLE_API_KEY, 
                                   model="gemini-1.5-pro-latest")

In [30]:
output_parser = StrOutputParser()

In [31]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | chat_template
    | chat_model
    | output_parser
)

In [35]:
response = rag_chain.invoke("What does the research  Paper say?")

response

'## Summary of the Research Paper Based on the Provided Context:\n\nThe research paper focuses on exploring efficient and practical methods for compressing memory in Large Language Models (LLMs). While existing techniques like system-level optimization have attempted to improve efficiency, they often lack the balance between simplicity and quality.\n\n**Key points of the paper:**\n\n* **Motivation:** LLMs currently lack effective and practical memory compression techniques that are both simple and maintain high quality.\n* **Proposed Approach:** The paper introduces a novel approach that leverages linear attention mechanisms for memory update and retrieval processes. This approach offers simplicity and competitive performance.\n* **Methodology:**\n    * The researchers adopt the update rule and retrieval mechanism proposed by Katharopoulos et al. (2020) due to its simplicity and effectiveness.\n    * They cast the memory update and retrieval process as a linear attention mechanism, ena

In [36]:
from IPython.display import Markdown as md

md(response)

## Summary of the Research Paper Based on the Provided Context:

The research paper focuses on exploring efficient and practical methods for compressing memory in Large Language Models (LLMs). While existing techniques like system-level optimization have attempted to improve efficiency, they often lack the balance between simplicity and quality.

**Key points of the paper:**

* **Motivation:** LLMs currently lack effective and practical memory compression techniques that are both simple and maintain high quality.
* **Proposed Approach:** The paper introduces a novel approach that leverages linear attention mechanisms for memory update and retrieval processes. This approach offers simplicity and competitive performance.
* **Methodology:**
    * The researchers adopt the update rule and retrieval mechanism proposed by Katharopoulos et al. (2020) due to its simplicity and effectiveness.
    * They cast the memory update and retrieval process as a linear attention mechanism, enabling the use of stable training techniques from related methods.
* **Evaluation:**
    * The paper reports token-level retrieval accuracy for passkeys hidden within long input sequences (ranging from 32K to 1M tokens) at different positions (start, middle, end). This evaluation helps assess the effectiveness of the proposed memory compression technique.

**Overall, the research aims to address the challenge of efficient memory usage in LLMs by proposing a simple yet effective approach based on linear attention mechanisms.** 


KeyError: "Input to ChatPromptTemplate is missing variables {'context'}.  Expected: ['context', 'question'] Received: ['question']"