In [1]:
from langchain_community.vectorstores import Chroma
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain.schema import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, Tool

from dotenv import load_dotenv
import os
import fitz

In [2]:
load_dotenv()

True

In [3]:
fire_dir_with_notes="../../../../NOTES/papers/inprogress/Attention-is-all-you-need.pdf"
fire_dir_origin="../../../../NOTES/papers/inprogress/Attentionisallyouneed.pdf"

In [4]:
db_dir = os.path.join(os.getcwd(), "db")
persistent_directory = os.path.join(db_dir, "chroma_db")

In [5]:
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)  # open pdf file
    text = ""
    for page in doc:  # recursive each page
        text += page.get_text("text") + "\n"  # Extract text and wrap
    return text.strip()

In [6]:
# extract text from pdf
pdf_text=extract_text_from_pdf(fire_dir_origin)

In [7]:
type(pdf_text)

str

In [8]:
def create_vector_store(text:str,metadata:str,chunk_size:int=1000,chunk_overlap:int=100,embedding_model_name:str="text-embedding-3-small"):
    # combine text content and file metadata together
    documents = [Document(page_content=pdf_text, metadata={"source": metadata})]

    # Splits text into chunks
    text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)

    # Define the embedding model
    embeddings = OpenAIEmbeddings(model=embedding_model_name)

    # create chroma db
    db = Chroma.from_documents(
        docs, embeddings, persist_directory=persistent_directory
    )

    return db

In [9]:
db=create_vector_store(text=pdf_text,metadata=fire_dir_with_notes)

Created a chunk of size 2857, which is longer than the specified 1000
Created a chunk of size 4255, which is longer than the specified 1000
Created a chunk of size 1826, which is longer than the specified 1000
Created a chunk of size 2507, which is longer than the specified 1000
Created a chunk of size 3192, which is longer than the specified 1000
Created a chunk of size 3472, which is longer than the specified 1000
Created a chunk of size 3320, which is longer than the specified 1000
Created a chunk of size 3193, which is longer than the specified 1000
Created a chunk of size 2972, which is longer than the specified 1000
Created a chunk of size 3111, which is longer than the specified 1000
Created a chunk of size 3215, which is longer than the specified 1000
Created a chunk of size 3213, which is longer than the specified 1000


In [10]:
# Then use rag / vector store db with llm toghether

In [11]:
# load db

# Define the embedding model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Load create chroma vecstore with the corresponding embedding function
pdf_db = Chroma(persist_directory=persistent_directory,
            embedding_function=embeddings)

  pdf_db = Chroma(persist_directory=persistent_directory,


In [12]:
# create rag retriever
retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": 1})

In [13]:
llm = ChatOpenAI(model="gpt-4o")

In [14]:
from langchain.memory import ConversationBufferMemory

In [15]:
memory = ConversationBufferMemory(
    memory_key="history", return_messages=True)

  memory = ConversationBufferMemory(


In [16]:
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder,AIMessagePromptTemplate

In [17]:
# Identify conversation history based on context and rewrite questions to avoid mis-searching due to short questions

system_message=SystemMessagePromptTemplate.from_template(
    """ Given the following conversation and the input question, rephrase the input question to be a standalone question.

        Provide only ONE action per $JSON_BLOB, as shown:
        ```
        {{
        "action": $TOOL_NAME,
        "action_input": $INPUT
        }}
        ```
        Follow this format:

        Question: input question to answer
        Thought: consider previous and subsequent steps
        Action:
        ```
        $JSON_BLOB
        ```
        Observation: action result
        ... (repeat Thought/Action/Observation N times)
        Thought: I know what to respond
        Action:
        ```
        {{
        "action": "Final Answer",
        "action_input": "Final response to human"
        }}
        ```        
        Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation
     
        """    
)

history_placeholder=MessagesPlaceholder(variable_name="history",optional=True)

human_message=HumanMessagePromptTemplate.from_template(
    """{input}
    
    {agent_scratchpad}
    """
)


contextualize_prompt=ChatPromptTemplate.from_messages([
    system_message,
    history_placeholder,
    human_message
])

In [18]:
# reformulate the question based on chat history
history_aware_retriever = create_history_aware_retriever(
    llm=llm, retriever=retriever, prompt=contextualize_prompt
)

In [19]:
system_message=SystemMessagePromptTemplate.from_template(
    """
    You are an AI assistant. Use the retrieved context below to answer questions.
    If you don't know the answer, just say so.
    {context}
    
    **Answer each question in 3 short sentences. Make the answer concise and clear**

    Provide only ONE action per $JSON_BLOB, as shown:
    ```
    {{
    "action": $TOOL_NAME,
    "action_input": $INPUT
    }}
    ```
    Follow this format:

    Question: input question to answer
    Thought: consider previous and subsequent steps
    Action:
    ```
    $JSON_BLOB
    ```
    Observation: action result
    ... (repeat Thought/Action/Observation N times)
    Thought: I know what to respond
    Action:
    ```
    {{
    "action": "Final Answer",
    "action_input": "Final response to human"
    }}
    ```        
    Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation 
    """
)

qa_prompt=ChatPromptTemplate.from_messages([
    system_message,
    history_placeholder,
    human_message
])

In [20]:
# Create a chain for passing a list of Documents to a model.
# feeds all retrieved context into the LLM

qa_chain = create_stuff_documents_chain(llm=llm, prompt=qa_prompt)

In [21]:
# First use history_aware_retriever to adjust the problem (complete historical information)
# Then use retriever to query the knowledge base.
# Finally, the query result is passed to question_answer_chain to generate the final answer.
rag_chain = create_retrieval_chain(retriever=history_aware_retriever, combine_docs_chain=qa_chain)

In [22]:
tools = [
    Tool(
        name="Answer Question with local content",
        func=lambda input, **kwargs: rag_chain.invoke(
            {"input": input, "history": memory.chat_memory.messages,"agent_scratchpad":""}
        ),
        description="Use RAG to answer questions based on retrieved context on local laptop.",
    )
]


In [23]:
system_message=SystemMessagePromptTemplate.from_template(
    """ Respond to the human as helpfully and accurately as possible. You have access to the following tools:
        {tools}
        Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
        Valid "action" values: "Final Answer" or {tool_names}.
        Provide only ONE action per $JSON_BLOB, as shown:
        ```
        {{
        "action": $TOOL_NAME,
        "action_input": $INPUT
        }}
        ```
        Follow this format:

        Question: input question to answer
        Thought: consider previous and subsequent steps
        Action:
        ```
        $JSON_BLOB
        ```
        Observation: action result
        ... (repeat Thought/Action/Observation N times)
        Thought: I know what to respond
        Action:
        ```
        {{
        "action": "Final Answer",
        "action_input": "Final response to human"
        }}
        ```        
        Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation
     
        """
)

history_placeholder=MessagesPlaceholder(variable_name="history",optional=True)

scratchpad_placeholder = MessagesPlaceholder(variable_name="agent_scratchpad", optional=True)

human_message=HumanMessagePromptTemplate.from_template(
    """{input}
    
    {agent_scratchpad}
    
    (reminder to respond in a JSON blob no matter what)"""
)

assisstant_message=AIMessagePromptTemplate.from_template(
    "Understood! I will analyze your question type and provide a detailed response."
)

In [24]:
prompt_template=ChatPromptTemplate.from_messages([
    system_message,
    history_placeholder,
    human_message,
    assisstant_message
])

In [25]:
from langchain.agents import AgentExecutor, create_structured_chat_agent

In [26]:
agent = create_structured_chat_agent(llm=llm, tools=tools, prompt=prompt_template)

In [27]:
agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory,handle_parsing_errors=True, verbose=True)

In [28]:
response = agent_executor.invoke({
    "input": "What is 15^2?",
    "history": memory.chat_memory.messages,  
})

print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLet's perform the calculation:

15^2 means 15 multiplied by 15, which equals 225.

Here is the response in a JSON blob as requested:

```json
{
    "action": "Final Answer",
    "action_input": "15^2 equals 225."
}
```[0m

[1m> Finished chain.[0m
{'input': 'What is 15^2?', 'history': [HumanMessage(content='What is 15^2?', additional_kwargs={}, response_metadata={}), AIMessage(content='15^2 equals 225.', additional_kwargs={}, response_metadata={})], 'output': '15^2 equals 225.'}


In [29]:
response = agent_executor.invoke({
    "input": "What is transformer, answer it with local knowledge.",
    "history": memory.chat_memory.messages,  
})

print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction:
```
{
  "action": "Answer Question with local content",
  "action_input": "What is transformer?"
}
```[0m[36;1m[1;3m{'input': 'What is transformer?', 'history': [HumanMessage(content='What is 15^2?', additional_kwargs={}, response_metadata={}), AIMessage(content='15^2 equals 225.', additional_kwargs={}, response_metadata={})], 'agent_scratchpad': '', 'context': [Document(metadata={'source': '../../../../NOTES/papers/inprogress/Attention-is-all-you-need.pdf'}, page_content='Figure 1: The Transformer - model architecture.\nThe Transformer follows this overall architecture using stacked self-attention and point-wise, fully\nconnected layers for both the encoder and decoder, shown in the left and right halves of Figure 1,\nrespectively.\n3.1\nEncoder and Decoder Stacks\nEncoder:\nThe encoder is composed of a stack of N = 6 identical layers. Each layer has two\nsub-layers. The first is a multi-head self-attention mechan

In [30]:
response['output']

'The Transformer is a model architecture that utilizes stacked self-attention and point-wise, fully connected layers in both its encoder and decoder components. The encoder consists of a stack of six identical layers, each with a multi-head self-attention mechanism and a position-wise feed-forward network, employing residual connections and layer normalization. Similarly, the decoder also stacks six layers, adding a third sub-layer for attending to encoder outputs and ensuring predictions depend only on preceding positions through masking.'

In [31]:
response = agent_executor.invoke({
    "input": "Where is the paper exist on my laptop?",
    "history": memory.chat_memory.messages,  
})

print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction:
```
{
  "action": "Answer Question with local content",
  "action_input": "Where is the Transformer paper located on my laptop?"
}
```[0m[36;1m[1;3m{'input': 'Where is the Transformer paper located on my laptop?', 'history': [HumanMessage(content='What is 15^2?', additional_kwargs={}, response_metadata={}), AIMessage(content='15^2 equals 225.', additional_kwargs={}, response_metadata={}), HumanMessage(content='What is transformer, answer it with local knowledge.', additional_kwargs={}, response_metadata={}), AIMessage(content='The Transformer is a model architecture that utilizes stacked self-attention and point-wise, fully connected layers in both its encoder and decoder components. The encoder consists of a stack of six identical layers, each with a multi-head self-attention mechanism and a position-wise feed-forward network, employing residual connections and layer normalization. Similarly, the decoder also stac

In [32]:
response['output']

'The Transformer paper, titled "Attention is All You Need," is located at the following path on your laptop: `../../../../NOTES/papers/inprogress/Attention-is-all-you-need.pdf`.'