In [1]:
from langchain_community.document_loaders import PyPDFLoader

path = "Docs/plan_and_solve.pdf"

loader = PyPDFLoader(path)
docs = loader.load()

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=25000,
    chunk_overlap=0,
    length_function=len,
    is_separator_regex=False
)
splitted_docs = text_splitter.split_documents(docs)

In [5]:
import torch
from langchain_community.embeddings import HuggingFaceBgeEmbeddings


def bge_embedding():
    model_kwargs = {"device": "cuda" if torch.cuda.is_available() else "cpu"}
    print("CUDA is available!" if model_kwargs["device"] == "cuda" else "CUDA is not available!")
    print("Loading BGE model...")
    model_name = "BAAI/bge-m3"
    encode_kwargs = {"normalize_embeddings": True}

    embeddings = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )
    return embeddings



embeddings = bge_embedding()




CUDA is not available!
Loading BGE model...


  from .autonotebook import tqdm as notebook_tqdm


In [12]:
from langchain_qdrant import Qdrant
from qdrant_client import QdrantClient, models
import uuid

collection_name = "analytix_camp"
url = "http://localhost:6333"

file_uuid = uuid.uuid4()

documents_with_payload = []

for chunk in splitted_docs:
    chunk.metadata["group_id"] = "Hasnain Ali Poonja"
    chunk.metadata['file_uid'] = file_uuid
    chunk.metadata['num_of_pages'] = len(docs)
    
    documents_with_payload.append(chunk)

vector_store = Qdrant.from_documents(
    documents_with_payload,
    embeddings,
    collection_name=collection_name,
    url=url,
)


In [11]:
from qdrant_client import QdrantClient, models

url = "http://localhost:6333"
qdrant_client = QdrantClient(url=url)

qdrant_client.delete(
    collection_name="analytix_camp",
    points_selector=models.FilterSelector(
        filter=models.Filter(
            must=[
                models.FieldCondition(
                    key="metadata.file_uid",
                    match=models.MatchValue(value="d03e344a-6bba-4146-b0fc-dc17cf541179"),
                ),
                models.FieldCondition(
                    key="metadata.group_id",
                    match=models.MatchValue(value="Hasnain Ali Poonja")
                )
            ],
        )
    ),
)

UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [15]:
from typing import List, Optional
import os

def build_file_conditions(file_uuid_array):
    must_conditions = []
    # file_uuid = "1d4ae57f-d8fa-464c-8c2a-0eac67d340bd"
    
    for file_uuid in file_uuid_array:
        must_conditions.extend([
            models.FieldCondition(
                key="metadata.file_uid",
                match=models.MatchValue(value=file_uuid)
            ),
        ])
    return must_conditions

def create_vector_store_retriever(file_uuid_array, embeddings, collection_name):
    url = "http://localhost:6333"
    qdrant_client = QdrantClient(url=url)

    vector_store = Qdrant(
        client=qdrant_client,
        collection_name=collection_name,
        embeddings=embeddings,
    )

    if file_uuid_array[0] != '':
        file_conditions = build_file_conditions(file_uuid_array)
        search_args = {"filter": models.Filter(should=file_conditions), "k": 5, "score_threshold": 0.4}
        return vector_store.as_retriever(search_type="similarity_score_threshold", search_kwargs=search_args)
    else:
        search_args = {"filter": models.Filter(must=[models.FieldCondition(key="metadata.group_id", match=models.MatchValue(value="Hasnain Ali Poonja"))]), "k": 5, "score_threshold": 0.4}
        return vector_store.as_retriever(search_type="similarity_score_threshold", search_kwargs=search_args)



retriever = create_vector_store_retriever(["1d4ae57f-d8fa-464c-8c2a-0eac67d340bd"], embeddings, "analytix_camp")




  vector_store = Qdrant(


In [16]:
documents_retrieved =  retriever.invoke("Zihao Wang, Shaofei Cai, Anji Liu, Xiaojian Ma, and Yitao Liang. 2023. Describe, explain, plan and se- lect: Interactive planning with large language models enables open-world multi-task agents.")

In [17]:
documents_retrieved

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-05-29T00:17:24+00:00', 'author': '', 'keywords': '', 'moddate': '2023-05-29T00:17:24+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'Docs/plan_and_solve.pdf', 'total_pages': 24, 'page': 10, 'page_label': '11', 'group_id': 'Hasnain Ali Poonja', 'file_uid': '1d4ae57f-d8fa-464c-8c2a-0eac67d340bd', 'num_of_pages': 24, '_id': '8106b87c-b9b9-47a7-8ecd-6aadb8f20a9a', '_collection_name': 'analytix_camp'}, page_content='Zihao Wang, Shaofei Cai, Anji Liu, Xiaojian Ma, and\nYitao Liang. 2023. Describe, explain, plan and se-\nlect: Interactive planning with large language models\nenables open-world multi-task agents. arXiv preprint\narXiv:2302.01560.\nJason Wei, Yi Tay, Rishi Bommasani, Colin Raffel,\nBarret Zoph, Sebastian Borgeaud, Dani Yogatama,\nMaarten Bosma, De

In [19]:
documents_retrieved[0].page_content

'Zihao Wang, Shaofei Cai, Anji Liu, Xiaojian Ma, and\nYitao Liang. 2023. Describe, explain, plan and se-\nlect: Interactive planning with large language models\nenables open-world multi-task agents. arXiv preprint\narXiv:2302.01560.\nJason Wei, Yi Tay, Rishi Bommasani, Colin Raffel,\nBarret Zoph, Sebastian Borgeaud, Dani Yogatama,\nMaarten Bosma, Denny Zhou, Donald Metzler, et al.\n2022a. Emergent abilities of large language models.\narXiv preprint arXiv:2206.07682.\nJason Wei, Xuezhi Wang, Dale Schuurmans, Maarten\nBosma, Ed Chi, Quoc Le, and Denny Zhou. 2022b.\nChain of thought prompting elicits reasoning in large\nlanguage models. In Thirty-sixth Conference on Neu-\nral Information Processing Systems (NeurIPS 2022).\nYixuan Weng, Minjun Zhu, Shizhu He, Kang Liu,\nand Jun Zhao. 2022. Large language models are\nreasoners with self-verification. arXiv preprint\narXiv:2212.09561.\nShunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran,\nThomas L. Griffiths, Yuan Cao, and Karthik\nNarasimhan. 

In [None]:
combined_context = "\n\n".join(doc.page_content for doc in documents_retrieved)


In [23]:
from langchain.schema.output_parser import StrOutputParser
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

In [24]:
chat_history = []
query = "What is the abstract of the paper?"

response = contextualize_q_chain.invoke({"chat_history": chat_history, "question": query})


In [25]:
response

'Can you provide the abstract of the paper?'

In [26]:
qa_system_prompt = """
You are a highly accurate and question-answering RAG agent. Your task is to provide answers based on the given context.

Instructions:
1. Answer the users QUESTION using the CONTEXT text privided.Keep your answer ground in the facts of the CONTEXT.
2. Determine if the context answers the question.
3. If the answer is not found in the CONTEXT, respond with your own knowlege that can be outside of the CONTEXT but in that case start by saying this "It seems that the information required to answer is not available in your documents, however, based on my knowledge, I can provide you with the following information:".


CONTEXT:
{context}
"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

rag_chain = qa_prompt | llm | StrOutputParser()

In [28]:
rag_response = rag_chain.invoke({"chat_history": chat_history, "question": query, "context": combined_context})

In [29]:
rag_response

'The abstract of the paper is as follows:\n\n"Large language models (LLMs) have recently been shown to deliver impressive performance in various NLP tasks. To tackle multi-step reasoning tasks, few-shot chain-of-thought (CoT) prompting includes a few manually crafted step-by-step reasoning demonstrations which enable LLMs to explicitly generate reasoning steps and improve their reasoning task accuracy. To eliminate the manual effort, Zero-shot-CoT concatenates the target problem statement with “Let’s think step by step” as an input prompt to LLMs. Despite the success of Zero-shot-CoT, it still suffers from three pitfalls: calculation errors, missing-step errors, and semantic misunderstanding errors. To address the missing-step errors, we propose Plan-and-Solve (PS) Prompting. It consists of two components: first, devising a plan to divide the entire task into smaller subtasks, and then carrying out the subtasks according to the plan. To address the calculation errors and improve the qu

In [None]:
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

qa_system_prompt = """
You are a highly accurate and question-answering RAG agent. Your task is to provide answers based on the given context.

Instructions:
1. Answer the users QUESTION using the CONTEXT text privided.Keep your answer ground in the facts of the CONTEXT.
2. Determine if the context answers the question.
3. If the answer is not found in the CONTEXT, respond with your own knowlege that can be outside of the CONTEXT but in that case start by saying this "It seems that the information required to answer is not available in your documents, however, based on my knowledge, I can provide you with the following information:".


CONTEXT:
{context}
"""


qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]

rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)

In [43]:
async def run_chain(rag_chain, query, chat_history):
    ai_response = ""

    async for token in rag_chain.astream(
        {"question": query, "chat_history": chat_history}
    ):
        yield token.content
        ai_response += token.content

    chat_history.extend(
        [HumanMessage(content=query), AIMessage(content=ai_response)]
    )

    print("RAG chain executed successfully.")

In [44]:
async def test_streaming():
    query = "What is the abstract of the paper?"
    chat_history = []

    async for token in run_chain(rag_chain, query, chat_history):
        print(token, end="", flush=True)

await test_streaming()

It seems that the information required to answer is not available in your documents, however, based on my knowledge, I can provide you with the following information: The abstract of a paper typically summarizes the main objectives, methods, results, and conclusions of the research. If you need the specific abstract of the paper mentioned in the context, you would need to access the paper directly from a database or repository where it is published, such as arXiv.RAG chain executed successfully.
