# Dependencies

In [None]:
from langchain_community.document_loaders import PyPDFLoader,TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from openai import OpenAI
import requests

from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from pinecone import Pinecone, ServerlessSpec
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate


from langchain.agents import initialize_agent, Tool
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from langchain.agents.format_scratchpad.openai_tools import (
    format_to_openai_tool_messages,
)
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain.agents import AgentExecutor

from langchain_core.tools import tool
from pydantic.v1 import BaseModel, Field
from openai import OpenAI

from pydantic import BaseModel, Field, ValidationError
from typing import List, Union, Dict, Optional
from pinecone import Index

import validators
import requests
import os
import uuid

from dotenv import load_dotenv
load_dotenv()

import os
import sys
import time

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV")
PINECONE_QA_BASE_INDEX_NAME = os.getenv("PINECONE_QA_BASE_INDEX_NAME")
PINECONE_KNOWLEDGE_BASE_INDEX_NAME = os.getenv("PINECONE_KNOWLEDGE_BASE_INDEX_NAME")

# Client Initialisation
Now, we will be using the openAI client to get embeddings, as we would be needing in our model along with neccessary intialisations.

In [45]:
openAI_client = OpenAI(api_key=OPENAI_API_KEY)
embedding_model = openAI_client.embeddings
pc = Pinecone(api_key = PINECONE_API_KEY, environment = PINECONE_API_ENV)
index = pc.Index(PINECONE_QA_BASE_INDEX_NAME)

def get_embedding(text) :
    """
        Function to convert the text string into embeddings using text-embedding-3-small from OpenAI
    
        Args:
            text : A string which will contain either the text chunk or the user query
            
        Returns:
            vector : A vector of 1536 dimensions
    """
    
    try:
        response = embedding_model.create(
            input=text,
            model="text-embedding-3-small"
        )
        
        return response.data[0].embedding   
    
    except Exception as e:
        raise Exception(str(e))
    

Now, to use the above, we need to create these functions as a collective tool, which can be used by an agent to query the vector DB.

# Tool Creation
Now, we would have to create tools for this purpose, as asynchronus functions which await till the vector DB is loaded and then query the vector DB, evnetually returning the relevant texts. As the admin id will be a fixed one, we will be directly referring it from the environment.

In [46]:
def get_relevant_pairs(query):
    query_vector = get_embedding(query)
    
    results = index.query(
        vector = query_vector,
        top_k = 10,
        include_values = False,
        include_metadata = True,
        filter={
            "ADMIN_ID" : os.getenv('ADMIN_ID_QA'),
        }
    )
    
    relevant_pairs = []
    for record in results['matches']:
        pair = {}
        pair['id'] = record['metadata']['QA_ID']
        pair['score'] = record['score']
        pair['question'] = record['metadata']['question']
        pair['answer'] = record['metadata']['answer']
        relevant_pairs.append(pair)
    
    return relevant_pairs

For the answer generation, let us now set the system instructions for the LLM model, which would be used to generate the answers.

In [47]:
system_instructions = """
    You are a specialised AI context aware doubt solver working at an edtech startup, and you will be assisting the users to answer their queries based on previous intrcutors and TAs solved queries.
    You will be given a query by the user and the top relevant documents and you have to use those to answer the query asked by the user, which will be given to you below. 
    In the relevant documents,you will be given the id of the conversation, the cosine similarity score, the question which was aksed by previous student and the answers by the TAs, along with the id of the QA pair. 
    YOU MUST tell the user that they can explore this further by going to that thread (give them the id) and looking at the entire conversation for better understanding. (Think of this as a reference to build authenticity, as you mention the id).
    
    MAKE SURE YOU DO NOT ANSWER FROM ANYTHING APART FROM THE DOCUMENTS GIVEN TO YOU. 
"""

Now, we would also need the query prompt created from a prompt template, by which we can dynamically populate the query prompt with the relevant data.

In [48]:
query_prompt_template = """
    \n\n User Query : {query}
    \n\n Documents : {documents}
"""

query_prompt = PromptTemplate(
    input_variables=["query","documents"],
    template=query_prompt_template
)

# Assistant API 
Now, we have an assistant ready to be used, which can be used to go through the query and relevant documents, and then generate the answers. We will be using hitting the assistant with an API call, and then getting the answers from the assistant.

In [49]:
openAI_client = OpenAI(api_key=OPENAI_API_KEY)
assistant_id = os.getenv("QA_OPENAI_ASSISTANT_ID")

Now, we will be building the function to answer the queries from the assistant, which would be an asynchronus function on a thread as we aim to fetch the answer from the assistant.

In [50]:
thread = openAI_client.beta.threads.create()

def fetch_answer(final_prompt):
    openAI_client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content= [{
            "type" : "text",
            "text" : final_prompt
        }]
    )
            
    run = openAI_client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant_id,
    )
    
    while True:
        run = openAI_client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id
        )
        if run.status == 'completed':
            messages = openAI_client.beta.threads.messages.list(
                thread_id=thread.id
            )
            return messages.data[0].content[0].text.value
        elif run.status in ['failed', 'cancelled', 'expired']:
            raise Exception(f"Run ended with status: {run.status}")
        time.sleep(1)

# Agent
Now, we will have to make the tools for the agent and make it ready to be used. We will be using the agent to fetch the user query (ideally, this would be coming in from the supervisor), and then we will be using the agent to fetch the answers from the assistant (eventually to be sent back to the supervisor).

In [None]:
@tool
def answer_query(query):
    """
        Find relevant documents for a given query and userID.
        
        Args:
        - query: The search query
        
        Returns:
        Final text answer from the LLM, given the relevant documents.
    """
    query_vector = get_embedding(query)
    
    results = index.query(
        vector = query_vector,
        top_k = 10,
        include_values = False,
        include_metadata = True,
        filter={
            "ADMIN_ID" : os.getenv('ADMIN_ID_QA'),
        }
    )
    
    relevant_pairs = []
    for record in results['matches']:
        pair = {}
        pair['id'] = record['metadata']['QA_ID']
        pair['score'] = record['score']
        pair['question'] = record['metadata']['question']
        pair['answer'] = record['metadata']['answer']
        relevant_pairs.append(pair)
    
    
    final_prompt = query_prompt.invoke({"query" : query,"documents": relevant_pairs}).text
    return fetch_answer(final_prompt)
    

Now that we have the tool ready, we will be making the prompt for the agent. This aims to clearly tell the assisant what has to be done and what is the query.

In [52]:
main_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an assistant who can handle queries by using the tool you have and refer the user to correct question answer pairs, you just have to pass the exact user query to the tool and it will do the rest, and just give them the answer as it has come from the tool...",
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

Let us now initialise the LLM instance from langchain and then use the tools above and bind them to the agent, which would be used to fetch the answers.

In [53]:
llm = ChatOpenAI(
    api_key = OPENAI_API_KEY,
    model = "gpt-4o",
    temperature=0.2
)

In [54]:
tools = [answer_query]
llm_with_tools = llm.bind_tools(tools)

Now, this function creates an agent pipeline in a step-by-step flow, processing input data and returning a meaningful output. How the Pipeline Works Together : 
- Input Preparation: The user’s query (x["input"]) and intermediate steps are extracted and formatted.
- Main Prompt Application: The instructions for the agent (e.g., "You are a helpful assistant") are added to guide its behavior.
- Processing by LLM: The LLM processes the input and, if needed, interacts with tools to generate an appropriate response.
- Output Parsing: The raw response from the LLM is cleaned up for easy understanding by the user.

In [55]:
agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | main_prompt
    | llm_with_tools
    | OpenAIToolsAgentOutputParser()
)

With this, we can execute the function and see how the agent works.

In [56]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
list(agent_executor.stream({"input": "I was having issues in my NEXTJS project, as I tried to animate the dropdown menu, I got a hydration error."}))



[1m> Entering new None chain...[0m
[32;1m[1;3m
Invoking: `answer_query` with `{'query': 'I was having issues in my NEXTJS project, as I tried to animate the dropdown menu, I got a hydration error.'}`


[0m[36;1m[1;3mTo address the hydration error you are experiencing in your Next.js project while trying to animate a dropdown menu, you can follow these steps:

1. **Restart the Server**: Sometimes, simply restarting the server can resolve hydration issues.
   
2. **Inspect Styles**: Use the browser's inspect element tool to check if Tailwind CSS is being loaded correctly. If the styles are visible in the inspector but not applied, it might indicate a configuration issue with Tailwind CSS.

3. **Verify Tailwind Configuration**: Ensure that your Tailwind CSS setup is correct. This involves checking your Tailwind configuration file to make sure the necessary classes are being generated and applied.

For more detailed guidance, you can refer to the conversation with the ID `9cef8bde

[{'actions': [ToolAgentAction(tool='answer_query', tool_input={'query': 'I was having issues in my NEXTJS project, as I tried to animate the dropdown menu, I got a hydration error.'}, log="\nInvoking: `answer_query` with `{'query': 'I was having issues in my NEXTJS project, as I tried to animate the dropdown menu, I got a hydration error.'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_G4PYX3mACnDphE6SZ2CebMip', 'function': {'arguments': '{"query":"I was having issues in my NEXTJS project, as I tried to animate the dropdown menu, I got a hydration error."}', 'name': 'answer_query'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_50cad350e4'}, id='run-aa1bae58-c543-40b1-9f87-a5fe7f298a0f', tool_calls=[{'name': 'answer_query', 'args': {'query': 'I was having issues in my NEXTJS project, as I tried to animate the dropdown menu, I got a hydrat

In [57]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
list(agent_executor.stream({"input": "I am so confused about how JWT works..."}))



[1m> Entering new None chain...[0m
[32;1m[1;3m
Invoking: `answer_query` with `{'query': 'how JWT works'}`


[0m[36;1m[1;3mA JWT (JSON Web Token) works by taking a string and converting it into a token that can be used for authentication. The key plays a crucial role in signing and verifying the token, ensuring that it hasn't been tampered with. It does not encrypt or decrypt the token but is used to sign the token on the server side. The JWT consists of three parts: the payload, the secret key, and the expiry time. The secret key is essential for the server to verify the authenticity of the token sent by the client. If the token is valid, the server allows the client to proceed with further activities.

For more detailed information, you can refer to the conversation with the ID `2cef8bde-d6a7-4758-a736-7d2097c0b38a`. Exploring that thread might provide additional insights into how JWTs work and their implementation.[0m[32;1m[1;3mA JWT (JSON Web Token) works by taking a str

[{'actions': [ToolAgentAction(tool='answer_query', tool_input={'query': 'how JWT works'}, log="\nInvoking: `answer_query` with `{'query': 'how JWT works'}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1n7fzb8WW0plzZh00iIfyOeM', 'function': {'arguments': '{"query":"how JWT works"}', 'name': 'answer_query'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_50cad350e4'}, id='run-a8d637e7-115f-42b4-a82f-2ef10c601d6f', tool_calls=[{'name': 'answer_query', 'args': {'query': 'how JWT works'}, 'id': 'call_1n7fzb8WW0plzZh00iIfyOeM', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'answer_query', 'args': '{"query":"how JWT works"}', 'id': 'call_1n7fzb8WW0plzZh00iIfyOeM', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1n7fzb8WW0plzZh00iIfyOeM')],
  'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, '

With this, we now have an agent which will be answering the core question asked by the user, based on the Q&A knowledge base. We can replicate the same for text knowledge base query, as that has a different type of dataset.