# Dependencies

In [98]:
from langchain_community.document_loaders import PyPDFLoader,TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from openai import OpenAI
import requests

from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from pinecone import Pinecone, ServerlessSpec
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate


from langchain.agents import initialize_agent, Tool
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from langchain.agents.format_scratchpad.openai_tools import (
    format_to_openai_tool_messages,
)
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain.agents import AgentExecutor

from langchain_core.tools import tool
from pydantic.v1 import BaseModel, Field
from openai import OpenAI

from pydantic import BaseModel, Field, ValidationError
from typing import List, Union, Dict, Optional
from pinecone import Index

import validators
import requests
import os
import uuid

from dotenv import load_dotenv
load_dotenv()

import os
import sys
import time

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV")
PINECONE_KNOWLEDGE_BASE_INDEX_NAME = os.getenv("PINECONE_KNOWLEDGE_BASE_INDEX_NAME")

# Client Initialisation
Now, we will be using the openAI client to get embeddings, as we would be needing in our model along with neccessary intialisations.

In [2]:
openAI_client = OpenAI(api_key=OPENAI_API_KEY)
embedding_model = openAI_client.embeddings
pc = Pinecone(api_key = PINECONE_API_KEY, environment = PINECONE_API_ENV)
index = pc.Index(PINECONE_KNOWLEDGE_BASE_INDEX_NAME)

def get_embedding(text) :
    """
        Function to convert the text string into embeddings using text-embedding-3-small from OpenAI
    
        Args:
            text : A string which will contain either the text chunk or the user query
            
        Returns:
            vector : A vector of 1536 dimensions
    """
    
    try:
        response = embedding_model.create(
            input=text,
            model="text-embedding-3-small"
        )
        
        return response.data[0].embedding   
    
    except Exception as e:
        raise Exception(str(e))
    

Now, to use the above, we need to create these functions as a collective tool, which can be used by an agent to query the vector DB.

# Tool Creation
Now, we would have to create tools for this purpose, as asynchronus functions which await till the vector DB is loaded and then query the vector DB, evnetually returning the relevant texts. As the admin id will be a fixed one, we will be directly referring it from the environment.

In [104]:
def get_relevant_chunks(query: str, userID: str):
    """
    Find relevant documents for a given query and userID.
    
    Args:
    - query: The search query
    - userID: The ID of the user
    
    Returns:
    List of relevant document chunks
    """
    query_vector = get_embedding(query)
    results = index.query(
        vector=query_vector,
        top_k=5,
        include_values=False,
        include_metadata=True,
        filter={
            "userID": userID
        }
    )
    
    relevant_texts = []
    for record in results['matches']:
        text = {
            'score': record['score'],
            'text': record['metadata']['chunk'],
            'name': record['metadata']['document_name'],
            'reference': int(record["metadata"]["page_number"]) + 1
        }
        relevant_texts.append(text)
    
    return relevant_texts

For the answer generation, let us now set the system instructions for the LLM model, which would be used to generate the answers.

In [105]:
system_instructions = """
    You are a specialised AI document analyser working at an edtech startup, and you will be assisting the users to answer their queries. You will be given 
    the top relevant documents and you have to use those to answer the query asked by the user, which will be given to you below. 
    
    In the relevant documents,you will be given the cosine similarity score, the document name and the reference (which is the page number where this 
    text was in the document) and the text itself. You can in you answer integrate the document name and reference to build authenticity of your answer, 
    by precisely writing it like (reference page : page_num). You should eventually tell the user to explore more about the topic in the document and that 
    reference page. Infact, this would be highly favourable if you mention the document name and reference to build your authenticity
    
    MAKE SURE YOU DO NOT ANSWER FROM ANYTHING APART FROM THE DOCUMENTS GIVEN TO YOU. 
"""

Now, we would also need the query prompt created from a prompt template, by which we can dynamically populate the query prompt with the relevant data.

In [106]:
query_prompt_template = """
    \n\n User Query : {query}
    \n\n Documents : {documents}
"""

query_prompt = PromptTemplate(
    input_variables=["query","documents"],
    template= query_prompt_template
)

# Assistant API 
Now, we have an assistant ready to be used, which can be used to go through the query and relevant documents, and then generate the answers. We will be using hitting the assistant with an API call, and then getting the answers from the assistant.

In [107]:
openAI_client = OpenAI(api_key=OPENAI_API_KEY)
assistant_id = os.getenv("KNOWLEDGE_BASE_OPENAI_ASSISTANT_ID")

Now, we will be building the function to answer the queries from the assistant, which would be an asynchronus function on a thread as we aim to fetch the answer from the assistant.

In [108]:
thread = openAI_client.beta.threads.create()

def fetch_answer(final_prompt):
    openAI_client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content= [{
            "type" : "text",
            "text" : final_prompt
        }]
    )
            
    run = openAI_client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant_id,
    )
    
    while True:
        run = openAI_client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id
        )
        if run.status == 'completed':
            messages = openAI_client.beta.threads.messages.list(
                thread_id=thread.id
            )
            return messages.data[0].content[0].text.value
        elif run.status in ['failed', 'cancelled', 'expired']:
            raise Exception(f"Run ended with status: {run.status}")
        time.sleep(1)

# Agent
Now, we will have to make the tools for the agent and make it ready to be used. We will be using the agent to fetch the user query (ideally, this would be coming in from the supervisor), and then we will be using the agent to fetch the answers from the assistant (eventually to be sent back to the supervisor).

In [109]:
@tool
def answer_query(query):
    """
        Find relevant documents for a given query and userID.
        
        Args:
        - query: The search query
        
        Returns:
        Final text answer from the LLM, given the relevant documents.
    """
    query_vector = get_embedding(query)
    results = index.query(
        vector=query_vector,
        top_k=10,
        include_values=False,
        include_metadata=True,
        filter={
            "userID": os.getenv("ADMIN_ID")
        }
    )
    
    relevant_texts = []
    for record in results['matches']:
        text = {
            'score': record['score'],
            'text': record['metadata']['chunk'],
            'name': record['metadata']['document_name'],
            'reference': int(record["metadata"]["page_number"]) + 1
        }
        relevant_texts.append(text)
    
    final_prompt = query_prompt.invoke({"query" : query,"documents": relevant_texts}).text
    return fetch_answer(final_prompt)
    

Now that we have the tool ready, we will be making the prompt for the agent. This aims to clearly tell the assisant what has to be done and what is the query.

In [110]:
main_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an assistant who can handle queries by using the tool you have and refer the user to correct material, you just have to pass the exact user query to the tool and it will do the rest, you DO NOT HAVE TO GIVE ANY USERID TO ANY TOOL, they have the id pre-defined...",
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

Let us now initialise the LLM instance from langchain and then use the tools above and bind them to the agent, which would be used to fetch the answers.

In [111]:
llm = ChatOpenAI(
    api_key = OPENAI_API_KEY,
    model = "gpt-4o",
    temperature=0.2
)

In [112]:
tools = [answer_query]
llm_with_tools = llm.bind_tools(tools)

Now, this function creates an agent pipeline in a step-by-step flow, processing input data and returning a meaningful output. How the Pipeline Works Together : 
- Input Preparation: The user’s query (x["input"]) and intermediate steps are extracted and formatted.
- Main Prompt Application: The instructions for the agent (e.g., "You are a helpful assistant") are added to guide its behavior.
- Processing by LLM: The LLM processes the input and, if needed, interacts with tools to generate an appropriate response.
- Output Parsing: The raw response from the LLM is cleaned up for easy understanding by the user.

In [113]:
agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | main_prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

With this, we can execute the function and see how the agent works.

In [114]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
list(agent_executor.stream({"input": "what is meant by rolling up the state?"}))



[1m> Entering new None chain...[0m
[32;1m[1;3m
Invoking: `answer_query` with `{'query': 'what is meant by rolling up the state?'}`


[0m[36;1m[1;3m"Rolling up the state" refers to a technique in React where you manage state more efficiently by lifting it up to the least common ancestor (LCA) of components that need access to that state. Instead of duplicating the state across multiple components, you centralize it in a common ancestor, allowing that ancestor to manage the state and pass it down to its children as needed. This approach helps in reducing unoptimal re-renders and makes the state management more streamlined as your application grows (W9-react, reference page: 37).

For more details on this concept, you can explore the document "W9-react" on reference page 37.[0m[32;1m[1;3m"Rolling up the state" in React refers to managing state more efficiently by lifting it up to the least common ancestor (LCA) of components that need access to that state. Instead of duplicati

[{'actions': [ToolAgentAction(tool='answer_query', tool_input={'query': 'what is meant by rolling up the state?'}, log="\nInvoking: `answer_query` with `{'query': 'what is meant by rolling up the state?'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_LO5pigznnR2OX1i5ZWujkC3j', 'function': {'arguments': '{"query":"what is meant by rolling up the state?"}', 'name': 'answer_query'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_50cad350e4'}, id='run-2be70ff1-84c3-4ed7-af12-e5d1f8bffccf', tool_calls=[{'name': 'answer_query', 'args': {'query': 'what is meant by rolling up the state?'}, 'id': 'call_LO5pigznnR2OX1i5ZWujkC3j', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'answer_query', 'args': '{"query":"what is meant by rolling up the state?"}', 'id': 'call_LO5pigznnR2OX1i5ZWujkC3j', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id=

In [115]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
list(agent_executor.stream({"input": "I am so confused in useRef bruh..."}))



[1m> Entering new None chain...[0m
[32;1m[1;3m
Invoking: `answer_query` with `{'query': 'useRef in React'}`


[0m[36;1m[1;3mIn React, `useRef` is a hook that allows you to create a reference to a value or a DOM element that persists across renders. One of the key characteristics of `useRef` is that it does not trigger a re-render when the value it holds changes. This makes it particularly useful for storing mutable values that you want to persist without causing additional renders (W9-react, reference page: 25).

A common use case for `useRef` is to access and manipulate DOM elements directly. For example, you can create a ref to an input element and use it to focus the input programmatically (W9-react, reference page: 26).

To explore more about `useRef` and see examples of its usage, you can refer to the document "W9-react" on reference pages 25 and 26.[0m[32;1m[1;3mIn React, `useRef` is a hook that allows you to create a reference to a value or a DOM element that persist

[{'actions': [ToolAgentAction(tool='answer_query', tool_input={'query': 'useRef in React'}, log="\nInvoking: `answer_query` with `{'query': 'useRef in React'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_8pAykBBbRK3ip0vP0lzSyFqh', 'function': {'arguments': '{"query":"useRef in React"}', 'name': 'answer_query'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_50cad350e4'}, id='run-112e8f95-dfe9-4c60-84a2-928e743e65f1', tool_calls=[{'name': 'answer_query', 'args': {'query': 'useRef in React'}, 'id': 'call_8pAykBBbRK3ip0vP0lzSyFqh', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'answer_query', 'args': '{"query":"useRef in React"}', 'id': 'call_8pAykBBbRK3ip0vP0lzSyFqh', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_8pAykBBbRK3ip0vP0lzSyFqh')],
  'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'in

In [116]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
list(agent_executor.stream({"input": "btw what exactly do we mean by prop drelling? Can you give me an example..."}))



[1m> Entering new None chain...[0m
[32;1m[1;3m
Invoking: `answer_query` with `{'query': 'prop drelling example'}`


[0m[36;1m[1;3mProp drilling in React occurs when you need to pass data from a higher-level component down to a lower-level component that is several layers deep in the component tree. This often involves passing props through many intermediate components that do not use the props themselves, just to get them to the component that needs them. This can lead to increased complexity and maintenance challenges (W9-react, reference page: 39).

For example, if you have a component hierarchy like this:

```
<App>
  <Parent>
    <Child>
      <Grandchild>
        <TargetComponent />
      </Grandchild>
    </Child>
  </Parent>
</App>
```

If `TargetComponent` needs a piece of data from `App`, you would have to pass that data down through `Parent`, `Child`, and `Grandchild`, even if those components do not need to use the data themselves. This is what is referred to as pro

[{'actions': [ToolAgentAction(tool='answer_query', tool_input={'query': 'prop drelling example'}, log="\nInvoking: `answer_query` with `{'query': 'prop drelling example'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_h0LzmeJF4jCIFYGftdctGnta', 'function': {'arguments': '{"query":"prop drelling example"}', 'name': 'answer_query'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_50cad350e4'}, id='run-1fb71265-6207-47f8-9aa6-d739d33202eb', tool_calls=[{'name': 'answer_query', 'args': {'query': 'prop drelling example'}, 'id': 'call_h0LzmeJF4jCIFYGftdctGnta', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'answer_query', 'args': '{"query":"prop drelling example"}', 'id': 'call_h0LzmeJF4jCIFYGftdctGnta', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_h0LzmeJF4jCIFYGftdctGnta')],
  'messages': [AIMessageChunk(content='', addition

With this, we now have an agent which will be answering the core question asked by the user, based on the knowledge base. We can replicate the same for Q&A knowledge base query, as that has a different type of dataset.