# Dependencies

In [4]:
from langchain.document_loaders import PyPDFDirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from pinecone import Pinecone, ServerlessSpec
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate 
import pandas as pd
import json
import markdown
from dotenv import load_dotenv
load_dotenv()
import os
import time

In [5]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV")

# Loading the Data 
Now, each conversation is stored as a chat history json (assumig similar structure from Discord API), where each key is the name of the user and the value is the message sent by user, and this flow is maintained in the conversation. Now, our aim is to load these into the format we want to store as a vector. For now, let us store the conversation as a list of jsons, where each json is a chat history of a conversation.

In [6]:
conversations = [
    {
    "Dalvi OP": "i am currently doing the Excalidraw Project i was watching the second video i created all the files and followed all the steps but when i am executing the code by pnpm run dev i am getting this error i am stuck in this error for 3 hours if someone can please help me solve this error it would be very helpful for me",
    "TA Tarun": "what is the error?",
    "Dalvi OP (later)": "??",
    "TA Tarun (later)": "are you using esbuild?",
    "Dalvi OP (response)": "I don't think so where should I check if I am using it or no",
    "TA Harry Potter": "You need to configure it inside the http-backend app",
    "TA Tarun (later)": "package.json of http-server",
    "TA Sumana": "use latest version of node",
    "Dalvi OP (response final)": "@TA Sumana @TA Tarun @TA Harry Potter thankyou all of you the issue is resolved :perfect:"
  },
  {
    "Samim Rezvi OP": "how does a jwt work...like it takes a string and converts it into something random...what is the role of key here? @TA Harry Potter @TA Sumana @TA SerBarristan",
    "TA Harry Potter": "google kiya?",
    "Samim Rezvi OP (response)": "yes key ka role samajh nhi aya",
    "TA Harry Potter (clarification)": "it's that, allows verification that the token wasn't tampered with.",
    "Samim Rezvi OP (clarification request)": "mtlb key encrypt and decrypt krta hai na?",
    "TA Harry Potter (clarification final)": "Nahi, JWT ke case mein key encrypt aur decrypt nahi karti, balki sign aur verify karne ke liye use hoti hai.",
    "akshiiittt": "Basically jwt authentication mein kaam aati hai, like vo ek random kuch text generate karta hai jisko ham sign ki madad se create karte hai jismein ham 3 values pass on krte, first one is the payload second is the secret key(jo sabse main hai jiski help se hi server verfiy kar pata hai ki ki jwt jo genrate hua hai voh sahi hai ya galat and third pe expiry time of that token). This is basically the main thing of the jwt and second verify hota hai jo client se send hota hai and server compare karta hai secret ki help se yeh sahi token hai ke nahi and then hi allow karta hai client ko aggey kuch bhi activities karne ke liye.",
    "Samim Rezvi OP (final)": "accha...samajh gya.... thanks man"
  },
  {
    "Shubha Sarkar OP": "I am struggling a lot when building the second brain app using Recoil. Fetch all data when the /dashboard component loads from backend show in UI. There are 3 endpoints in backend \"/create\" \"/edit:id\" \"/delete:id\". The UI should update immediately and also update in backend when user performs these operations.",
    "TA Harry Potter": "Screen recording of the issue?",
    "Shubha Sarkar OP (response)": "https://drive.google.com/drive/folders/1e1n6mXUls2DD6yxnPANFfnEMKiJgeznd?usp=sharing",
    "TA Harry Potter (later)": "I'll check. Outside my home rn",
    "TA Harry Potter (recommendation)": "You should use an atom for state storage and combine it with a useEffect to fetch initial data on /dashboard load. It ensures updates reflect instantly by modifying the atom after every operation."
  },
  {
    "TANAY": "I am creating a project in NextJs. It was running fine but suddenly an error occurred while trying to animate the dropdown menu. Then I don't know why hydration error occurred. An error in global.css happened. When I undid the changes and re-ran the server, the error was gone but my styles were also gone.",
    "TA Jack Sparrow": "Restart the server and try again. Also inspect the styles like if Tailwind is even working or not.",
    "TANAY (response)": "It is showing but not applied.",
    "TA Jack Sparrow (clarification)": "Check the style in inspect elements.",
    "TANAY (clarification request)": "Also now it is showing hydration error.",
    "TA Jack Sparrow (suggestion)": "Check Tailwind docs to set it up again."
  },
  {
    "Aniket": "Auth.tsx\nimport { useRef } from \"react\";\nimport { Button } from \"../components/Button\";\nimport { Input } from \"../components/Input\";\nimport { BACKEND_URL } from \"../../config\";\nimport axios from \"axios\";\n\nexport const Signup = () => {\n const usernameRef = useRef<HTMLInputElement>();\n const passwordRef = useRef<HTMLInputElement>();\n\n const handleSignup = async () => {\n try {\n const username = usernameRef.current?.value;\n // console.log(username)\n const password = passwordRef.current?.value;\n // console.log(password)\n\n await axios.post(BACKEND_URL + \"/api/v1/signup\", {\n username,\n password\n });\n } catch (error) {\n console.log(error);\n }\n alert(\"User signed up!\");\n };\n\n return (\n <div className=\"flex justify-center items-center h-screen w-screen bg-gray-200\">\n <div className=\"bg-white rounded-md min-w-48 flex items-center justify-center flex-col p-8\">\n <Input reference={usernameRef} type=\"text\" placeholder=\"Username\" />\n <Input reference={passwordRef} type=\"password\" placeholder=\"Password\" />\n <div className=\"w-full mt-4\">\n <Button\n variant=\"primary\"\n size=\"md\"\n text=\"Signup\"\n fullWidth={true}\n onClick={handleSignup}\n />\n </div>\n </div>\n </div>\n );\n};\n\nexport const Signin = () => {\n return (\n <div className=\"flex justify-center items-center h-screen w-screen bg-gray-200\">\n <div className=\"bg-white rounded-md min-w-48 flex items-center justify-center flex-col p-8\">\n <Input type=\"text\" placeholder=\"Username\" />\n <Input type=\"password\" placeholder=\"Password\" />\n <div className=\"w-full mt-4\">\n <Button variant=\"primary\" size=\"md\" text=\"Signin\" fullWidth={true} />\n </div>\n </div>\n </div>\n );\n};",
    "Aniket (mention)": "@TA (WebDev and Devops)",
    "TA Harry Potter": "did you test the api in postman?",
    "TA Tarun": "add origin: * inside cors()",
    "Aniket (response)": "Yes",
    "Aniket (try)": "Ok let me try",
    "TA Harry Potter (advice)": "then enable it for the FE origin\n\napp.use(cors({\n origin: process.env.FRONTEND_URL || \"http://localhost:5173\",\n methods: ['GET', 'POST', 'PUT', 'DELETE'],\n credentials: true\n}));\nif you wanna enable for all origin",
    "TA Harry Potter (repeat)": "tarun add origin: ",
    "Aniket": "Updated the code still doesn't work",
    "TA Harry Potter": "app.use(cors(\"\")) this?",
    "Aniket": "you mean app.use(cors({\n origin: \"*\",\n})) ?",
    "TA Harry Potter (confirmation)": "yeah",
    "Aniket": "No",
    "TA Harry Potter (question)": "did you rebuild the server?",
    "Aniket": "Yes I did",
    "TA Harry Potter (suggestion)": "try logging the inputs in server routes and see if you are getting right or not",
    "Crytek": "reinstall the cors and re-import the cors module",
    "Aniket (resolved)": "The issue is resolved now ðŸŽ‰. I noticed that whenever I add \"return\" statement it gave me this error. When I removed the return statement, it worked. Can anyone explain this why"
  },

]

# OpenAI Assistant Inferencing
Now, we have designed an assistant in openAI which will assist to summarise these chats and get them as valid json Q&A pairs. Below, we can see the process of inferencing the assistant and fetching the output.

In [None]:
openAI_client = OpenAI(api_key=OPENAI_API_KEY)
assistant_id = os.getenv["QA_SUMMARISER_OPENAI_ASSISTANT_ID"]

Now, we have to create a thread. A thread here represents a conversation between a user and an assistant. Along with that, we should also sequentially add the messages from the conversation list to the thread. So here, the catch is that we will have to initiate a thread, add a conversation as the first message from the user, and then run the thread with the assistant. The assistant will then respond to the user's message and will add it's response as the next message in the thread. 

Since this has to be done sequentially, we will have to run the assistant for each conversation in the list and develop the functionalities in a modular way to be used later. 

In [8]:
thread = openAI_client.beta.threads.create()

def create_QAPair(conversation):
    openAI_client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content= [{
            "type" : "text",
            "text" : json.dumps(conversation)
        }]
    )
            
    run = openAI_client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant_id,
    )
    
    return run

def get_QAPair(thread, run):
    while True:
        run = openAI_client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id
        )
        if run.status == 'completed':
            messages = openAI_client.beta.threads.messages.list(
                thread_id=thread.id
            )
            return messages
        elif run.status in ['failed', 'cancelled', 'expired']:
            raise Exception(f"Run ended with status: {run.status}")
        time.sleep(1)
         

Now, let us run the thread individually for each conversation, get the output and append it to the list of outputs.

In [9]:
def get_QAs(conversations):
    QAs = []
    for conversation in conversations : 
        run = create_QAPair(conversation)
        messages = get_QAPair(thread, run)
        if(messages):
            QAs.append(messages.data[0].content[0].text.value)
        else:
            QAs.append({"question" : "Not available", "answer" : "Not available"})
    return QAs

In [10]:
QAs_PreID = get_QAs(conversations)

In [11]:
QAs_PreID

['{"question":"I am getting an error while executing the code for the Excalidraw Project using \'pnpm run dev\'. Can someone help me solve this error?","answer":"The issue was resolved by checking the configuration of the http-server in the package.json file and ensuring the use of the latest version of Node.js. The teaching assistants suggested configuring the http-backend app and updating Node.js, which helped in resolving the error."}',
 '{"question":"How does a JWT work, and what is the role of the key in it?","answer":"A JWT (JSON Web Token) works by taking a string and converting it into a token that can be used for authentication. The key plays a crucial role in signing and verifying the token, ensuring that it hasn\'t been tampered with. It does not encrypt or decrypt the token but is used to sign the token on the server side. The JWT consists of three parts: the payload, the secret key, and the expiry time. The secret key is essential for the server to verify the authenticity 

Now, let us mark each QA with a QA id for later purposes, for this we will use dummy QA id.

In [12]:
QA_ID = ["2cef8bde-c6a7-4758-a736-7d2097c0b38a","2cef8bde-d6a7-4758-a736-7d2097c0b38a","2cef8bde-c6a7-4758-b736-7d2097c0b38a","9cef8bde-c6a7-4758-a736-7d2097c0b38a","2cef8bde-c6a7-4758-a836-7d2097c0b38a"]

QAs = {}
id = 0
for QA in QAs_PreID:
    QAs[QA_ID[id]] = QA
    
    id += 1

QAs

{'2cef8bde-c6a7-4758-a736-7d2097c0b38a': '{"question":"I am getting an error while executing the code for the Excalidraw Project using \'pnpm run dev\'. Can someone help me solve this error?","answer":"The issue was resolved by checking the configuration of the http-server in the package.json file and ensuring the use of the latest version of Node.js. The teaching assistants suggested configuring the http-backend app and updating Node.js, which helped in resolving the error."}',
 '2cef8bde-d6a7-4758-a736-7d2097c0b38a': '{"question":"How does a JWT work, and what is the role of the key in it?","answer":"A JWT (JSON Web Token) works by taking a string and converting it into a token that can be used for authentication. The key plays a crucial role in signing and verifying the token, ensuring that it hasn\'t been tampered with. It does not encrypt or decrypt the token but is used to sign the token on the server side. The JWT consists of three parts: the payload, the secret key, and the exp

# Pinecone Initialization
Now, we will be using the pinecone vectorDB to store the embeddings of the QA Pairs. We will be using the `pinecone.init()` function to initialize the pinecone environment. We will be using the `pinecone.use_index()` function to use the index created for this project and setup the instance for the same.

In [13]:
pc = Pinecone(api_key = PINECONE_API_KEY, environment = PINECONE_API_ENV)

Now, let us view the indexs avaliable in the pinecone environment.

In [14]:
pc.list_indexes()

{'indexes': [{'deletion_protection': 'disabled',
              'dimension': 1536,
              'host': 'documents-hjunc2h.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'documents',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'},
              'tags': {'embedding_model': 'text-embedding-3-small'},
              'vector_type': 'dense'},
             {'deletion_protection': 'disabled',
              'dimension': 1536,
              'host': 'qa-pairs-hjunc2h.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'qa-pairs',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'},
              'tags': {'embedding_model': 'text-embedding-3-small'},
              'vector_type': 'dense'}]}

So here, we will be using the index named `qa-pairs` to store the embeddings of the QA Pairs.

# Embedding the Chunks using OpenAI text-embedding-3-small
Here, we will be using the OpenAI text-embedding-3-small model to embed the chunks, for which we will need an openAI instance initialised.

In [15]:
openAI_embedding_client = OpenAI(api_key=OPENAI_API_KEY)

Let us go ahead and set the embeddings model and a function to get the embeddings of any given text via the text-embedding-3-small model.

In [16]:
embedding_model = openAI_client.embeddings

def get_embedding(text) :
    response = embedding_model.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

# Vectorization
Now that we have the Q&A pairs, we can store them as vectors in pinecone and use them for similarity search. We can use the `pinecone` library to store the vectors in the pinecone database.

Now, each Q&A pair will belong to a course, where we will be having the : 
 
- Admin ID
- Course ID
- Topic ID
- Q&A Pair ID


Now, the ID for each vector will be in the format `admin-id_course-id_topic-id_QA-id` so that each pair can be searched and retrieved easily. Although searching is done via metadata filters, we create this uniuqe ID for each vector for easy deletion and retrieval.

Also, each pair will be stored in database.To simiplify the process, each entry in the vectorDB should have : 

* **ID** : The unique ID of the document, which will be a combination of the admin id, course id, topic id and QA id.
* **VALUES** : The embedding of the question and answer pair, as generated by the OpenAI text-embedding-3-small model.
* **METADATA** : The metadata of the document, which will include as follows : 
    *  **ADMIN_ID** : The ID of the admin in our system.
    *  **COURSE_ID** : The ID of the course in our system.
    *  **TOPIC_ID** : The ID of the topic in our system (pre-defined by Admin)
    *  **QA_ID** : The ID of the QA conversation in our system.
    *  **QUESTION** : The question in the Q&A pair.
    *  **ANSWER** : The answer in the Q&A pair.

In [17]:
ADMIN_ID = "d359d72b-40e8-4e9f-b567-62d77f273113"
COURSE_ID = "d0305607-30cf-4dfb-ba84-5dc407d9d5bf"
TOPIC_ID = "e766a834-d346-4265-8e5e-78e0832cabf0"

Let us now create the function to create vectors in our desired format as defined above.

In [18]:
def create_vectors(QAs,ADMIN_ID,COURSE_ID,TOPIC_ID):
    vectors = []
    
    for QA in QAs: 
        entry = {}
        entry["id"] = f"{ADMIN_ID}_{COURSE_ID}_{TOPIC_ID}_{QA}"
        
        question = json.loads(QAs[QA])["question"]
        answer = json.loads(QAs[QA])["answer"]
        text = "Question : " + question + "\nAnswer : " + answer
        
        entry["values"] = get_embedding(text)
        entry["metadata"] = {
            "ADMIN_ID" : ADMIN_ID,
            "COURSE_ID" : COURSE_ID,
            "TOPIC_ID" : TOPIC_ID,
            "QA_ID" : QA,
            "question" : question,
            "answer" : answer
        }
        
        vectors.append(entry)
        
    return vectors

In [19]:
vectors = create_vectors(QAs,ADMIN_ID,COURSE_ID,TOPIC_ID)

With this, we have our vectors stored in the ideal format to be pushed into the vector DB. Let us now push the vectors into the vectorDB of pinecone.

# Pushing the Vectors into the Pinecone Index

In [20]:
index_name = "qa-pairs"
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

index = pc.Index(index_name)

index.upsert(
    vectors=vectors
)

{'upserted_count': 5}

# Querying the Vectors
We shall now query the vectors to check if the vectors have been stored correctly in the pinecone index, and how does this exactly work. We will fetch the relevant vectors from the pinecone index. For that, we will create a function which takes a text query, converts into to an embedding and queries the pinecone index to get the most similar texts from the vectors stored in the index.

In [21]:
def get_relevant_pairs(query,ADMIN_ID,COURSE_ID,TOPIC_ID):
    query_vector = get_embedding(query)
    
    results = index.query(
        vector = query_vector,
        top_k = 10,
        include_values = False,
        include_metadata = True,
        filter={
            "ADMIN_ID" : ADMIN_ID,
            "COURSE_ID" : COURSE_ID,
            "TOPIC_ID" : TOPIC_ID,
        }
    )
    
    relevant_pairs = []
    for record in results['matches']:
        pair = {}
        pair['id'] = record['metadata']['QA_ID']
        pair['score'] = record['score']
        pair['question'] = record['metadata']['question']
        pair['answer'] = record['metadata']['answer']
        relevant_pairs.append(pair)
    
    return relevant_pairs

Finally, we can create a QA system which will take a query and return the most relevant chunks from the PDF document.

In [22]:
import sys 
while True:
    user_input = input(f"Input Prompt: ")
    if user_input=='exit':
        print( 'Exiting')
        sys.exit()
    if user_input == '':
        continue
    
    docs = get_relevant_pairs(user_input,ADMIN_ID,COURSE_ID,TOPIC_ID)
        
    for doc in docs:
        print(f"Rank {doc['score']} \n Question {doc['question']} \n Answer: \n {doc['answer']}")
        print("------------------------")

    print("------------------------------------------------------------------------------------------------------------------------")
        

Rank 0.532283187 
 Question How does a JWT work, and what is the role of the key in it? 
 Answer: 
 A JWT (JSON Web Token) works by taking a string and converting it into a token that can be used for authentication. The key plays a crucial role in signing and verifying the token, ensuring it hasn't been tampered with. The process involves creating a token with three parts: the payload, the secret key, and the expiry time. The secret key is essential for the server to verify the authenticity of the token sent by the client. It does not encrypt or decrypt the token but is used to sign and verify it, allowing the server to confirm the token's validity before permitting any client activities.
------------------------
Rank 0.214643821 
 Question I was facing an issue with CORS in my application, and it was resolved when I removed the 'return' statement. Can anyone explain why this happened? 
 Answer: 
 The issue with CORS was resolved by removing the 'return' statement because it might have

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


With this, our pipeline is complete and we can now move on to the next steps which is sending these relvant documents to the LLM to answer our query.

# Prompt Template for the LLM
Here, we will need to define the prompt for the LLM to answer the query. The LLM will be given the query and the relevant documents, and it will be expected to return the answer to the query. 

In [23]:
query_prompt_template = """
    You are a specialised AI context aware doubt solver working at an edtech startup, and you will be assisting the users to answer their queries based on previous intrcutors and TAs solved queries.
    You will be given a query by the user and the top relevant documents and you have to use those to answer the query asked by the user, which will be given to you below. 
    In the relevant documents,you will be given the id of the conversation, the cosine similarity score, the question which was aksed by previous student and the answers by the TAs, along with the id of the QA pair. 
    YOU MUST tell the user that they can explore this further by going to that thread (give them the id) and looking at the entire conversation for better understanding. (Think of this as a reference to build authenticity, as you mention the id).
    
    \n\n User Query : {query}
    \n\n Documents : {documents}
    
    MAKE SURE YOU DO NOT ANSWER FROM ANYTHING APART FROM THE DOCUMENTS GIVEN TO YOU. 
"""

In [24]:
query_prompt = PromptTemplate(
    input_variables=["query","documents"],
    template=query_prompt_template
)

# Initializing the LLM Client and Chain for RAG Model

In [25]:
chat = ChatOpenAI(
    temperature = 0,  
    model = "gpt-4o",
    openai_api_key = OPENAI_API_KEY
)

In [26]:
query_chain = LLMChain(
    llm=chat,
    prompt=query_prompt
)

# Q&A System using the chain

In [30]:
user_query = "I was having issues in my NEXTJS project, as I tried to animate the dropdown menu, I got a hydration error."
docs = get_relevant_pairs(user_query,ADMIN_ID,COURSE_ID,TOPIC_ID)

# Run the chain
response = query_chain.invoke({
    "query": user_query,
    "documents": docs
})

# Print the response
print("Response from LLM:")
print(response['text'])

Response from LLM:
To address the hydration error you're experiencing in your Next.js project while trying to animate a dropdown menu, you can follow these steps based on a similar issue resolved previously:

1. **Restart the Server**: Sometimes, simply restarting the server can resolve hydration errors.

2. **Inspect Styles**: Use the browser's inspect element tool to check if Tailwind CSS is being loaded correctly. If the styles are visible but not applied, it might indicate a configuration issue with Tailwind CSS.

3. **Verify Tailwind Configuration**: Ensure that your Tailwind CSS is set up correctly in your project. This involves checking your Tailwind configuration file to make sure the necessary classes are being generated and applied.

For a more detailed understanding, you can explore the entire conversation related to this issue by visiting the thread with the ID: 9cef8bde-c6a7-4758-a736-7d2097c0b38a. This will provide you with additional context and insights that might be he

Here, we have successfully built the RAG model and the Q&A system using the chain. With this, we get the functionality to query the relevant documents and get the answer to the query.

In [31]:
user_query = "I am so confused about how JWT works..."
docs = get_relevant_pairs(user_query,ADMIN_ID,COURSE_ID,TOPIC_ID)

# Run the chain
response = query_chain.invoke({
    "query": user_query,
    "documents": docs
})

# Print the response
print("Response from LLM:")
print(response['text'])

Response from LLM:
A JWT (JSON Web Token) works by taking a string and converting it into a token that can be used for authentication. The key plays a crucial role in signing and verifying the token, ensuring that it hasn't been tampered with. It does not encrypt or decrypt the token but is used to sign the token on the server side. The JWT consists of three parts: the payload, the secret key, and the expiry time. The secret key is essential for the server to verify the authenticity of the token sent by the client. If the token is valid, the server allows the client to proceed with further activities.

For a more detailed understanding, you can explore this further by looking at the entire conversation in the thread with the ID: 2cef8bde-d6a7-4758-a736-7d2097c0b38a.


# Deleting via Pinecone (Listing indexes with prefix and then deleting the index)

In [None]:
index_name = "qa-pairs"


index = pc.Index(index_name)
list1 = []
for ids in index.list(prefix=f"{ADMIN_ID}_{COURSE_ID}_{TOPIC_ID}_"):
  list1.append(ids)

In [None]:
KEY = "d359d72b-40e8-4e9f-b567-62d77f273113_d0305607-30cf-4dfb-ba84-5dc407d9d5bf_e766a834-d346-4265-8e5e-78e0832cabf0_2cef8bde-c6a7-4758-b736-7d2097c0b38a"
res = index.delete([ids for ids in index.list(prefix=KEY)])
res

{}