# Query Decomposition

In [None]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [None]:
class SubQuery(BaseModel):
    """Search over documents about bank institutions."""

    sub_query: str = Field(
        ...,
        description="A very specific query against documents.",
    )

In [None]:
system =\
"""
You are an expert that generates multiple sub-questions.
You have access to documents about bank institutions,
with each document referencing a single bank institution.

Perform query decomposition. Given a user question, break it down into distinct sub-questions per
bank organization that you need to answer in order to respond to the original user question.
Generate as many questions as the number of distinct bank entities you encounter
in the original question.

If there are acronyms or words you are not familiar with, do not try to rephrase them.
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

llm = AzureChatOpenAI(deployment_name="gpt-4", temperature=0.5)
llm_with_tools = llm.bind_functions([SubQuery])
query_analyzer = prompt | llm_with_tools | StrOutputParser() | (lambda x: x.split("\n"))

In [None]:
original_question = (
    "How many GRI requirements have been implemented by the Bank A, Bank B, and Bank C?"
)

sub_questions = query_analyzer.invoke({"question": original_question})

In [None]:
print(sub_questions)

# Output:
# [
    # 'How many GRI requirements have been implemented by Bank A?', 
    # 'How many GRI requirements have been implemented by Bank B?', 
    # 'How many GRI requirements have been implemented by Bank C?',
# ]

In [None]:
if sub_questions == [""]:
    print("No sub-questions were generated.")
    sub_questions = [original_question]
else:
    print(f"Original question was decomposed into {len(sub_questions)} sub-questions.")

# RAG

In [None]:
from common.utils import create_docsearch_agent

In [None]:
rag_agent = create_docsearch_agent() # Dummy RAG Agent

In [None]:
rag_results = []

for i, sub_question in enumerate(sub_questions, start=1):
    print(f"Sub-question {i}: {sub_question}")

    for k in range(2):
        print(f"Attempt {k + 1}")
        try:
            sub_response = rag_agent(sub_question)
            break
        except Exception as e:
            print(f"Error: {e}")
            sub_response = "No response found."
            continue

rag_results.append(sub_response)

In [None]:
def format_qa_pairs(questions, answers):
    
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"

    return formatted_string.strip()

In [None]:
context = format_qa_pairs(sub_questions, rag_results)

In [None]:
template =\
"""
Here is a set of Q+A pairs:

{context}

Use this information to answer the original question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_response = final_rag_chain.invoke({"context": context, "question": original_question})

In [None]:
print(final_response)