In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

openai_api_key = os.getenv('OPENAI-API-KEY')

intranet_azure_openai_api_key = os.getenv('INTRANET-AZURE-OPENAI-API-KEY')
intranet_azure_openai_endpoint = os.getenv('INTRANET-AZURE-OPENAI-ENDPOINT')
intranet_azure_openai_api_version = os.getenv('INTRANET-AZURE-OPENAI-API-VERSION')
intranet_azure_openai_deployment = os.getenv('INTRANET-AZURE-OPENAI-DEPLOYMENT')
intranet_azure_openai_model_name = os.getenv('INTRANET-AZURE-OPENAI-MODEL-NAME')

intranet_azure_openai_embedding_api_key = os.getenv('INTRANET-AZURE-OPENAI-EMBEDDING-API-KEY')
intranet_azure_openai_embedding_endpoint = os.getenv('INTRANET-AZURE-OPENAI-EMBEDDING-ENDPOINT')
intranet_azure_openai_embedding_api_version = os.getenv('INTRANET-AZURE-OPENAI-EMBEDDING-API-VERSION')
intranet_azure_openai_embedding_deployment_name = os.getenv('INTRANET-AZURE-OPENAI-EMBEDDING-DEPLOYMENT-NAME')

In [2]:
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings, OpenAIEmbeddings

azure_openai_chat_model = AzureChatOpenAI(
    model_name=intranet_azure_openai_model_name,
    api_key=intranet_azure_openai_api_key,
    azure_endpoint=intranet_azure_openai_endpoint,
    api_version=intranet_azure_openai_api_version,
    azure_deployment=intranet_azure_openai_deployment,
)

azure_openai_embeddings_model = AzureOpenAIEmbeddings(
    api_key=intranet_azure_openai_embedding_api_key,
    azure_endpoint=intranet_azure_openai_embedding_endpoint,
    api_version=intranet_azure_openai_embedding_api_version,
    azure_deployment=intranet_azure_openai_embedding_deployment_name,
)

openai_embeddings_model = OpenAIEmbeddings(
    api_key=openai_api_key,
)

### Retriever

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
import os

pdf_directory = "data/"
pdf_files = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith(".pdf")]

docs = [PyMuPDFLoader(pdf).load() for pdf in pdf_files]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500, chunk_overlap=50)
doc_splits = text_splitter.split_documents(docs_list)

In [4]:
from langchain_chroma import Chroma
from uuid import uuid4

vector_store = Chroma(
    collection_name="netflix",
    embedding_function=openai_embeddings_model,
    persist_directory="netflix",
)

uuids = [str(uuid4()) for _ in range(len(doc_splits))]
vector_store.add_documents(documents=doc_splits, ids=uuids)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

### Reflection

In [5]:
from pydantic import BaseModel, Field

class Generator(BaseModel):
    """Tool used when the retrieved documents provide sufficient information to directly answer the human query.
    The query field should contain the original question provided by the user.
    The generation field will contain the answer to user generated by the model based on the retrieved documents if the documents are related to the query, else you will answer to the user why the query cannot be answered.
    Please note that generation should keep internal state anonymous and not contain any personal information. Such as 'According to our knowledge,...'"""
    query: str = Field(
        description="The human question that can be directly answered by the retrieved documents.",
    )
    generation: str = Field(
        description="The answer generation of the query based on the documents. If there are no documents related or no documents returned, the generation will be why the query cannot be answered.",
    )

class Inspector(BaseModel):
    """Tool used when the retrieved documents do not provide enough information or wrong information to answer the human query.
    This tool helps to generate a new query based on related documents and feedback to retrieve more relevant information.
    The query field should contain the expanded question.
    The documents field will contain the list of documents that are related to the query and used to generate the new query.
    The feedback field will contain the feedback on your own to evaluate if the new query is relevant to the original question and related docs for future invocation."""
    query: str = Field(
        description="The new expanded query generated from the related documents. The query will be used to search for more relevant documents to get more information for that query.",
    )
    documents: list[str] = Field(
        description="The list of document that are retrieved in closest relation to user query and used as foundation to generate the new query.",
    )
    feedback: str = Field(
        description="Your feedback to evaluation if the new query is relevant to the original question, what needs to be consider. Should be written from your POV, such as 'I think this should be...', 'I should look into...', 'The documents are correct but I think,...'",
    )

llm_with_tools = azure_openai_chat_model.bind_tools([Generator, Inspector], tool_choice="any")
llm_with_generation_only_tool = azure_openai_chat_model.bind_tools([Generator])

In [6]:
system_prompt = """\
<role>
    - You are a Netflix supervisor that reflecting customer question by feedback and criticise user query and retrieved documents.
    - You will be provided the documents that have been retrieved from the search function of Netflix Database to answer the human question.
    - Your task is to evaluate the documents if the information enough and correct to answer the human question or we need to search for more relevant documents.
    - If the documents are enough, use the Generator tool to generate the answer based on the documents for user, do not make up the answer.
    - If the documents are not enough, use the Inspector tool to generate the expanded new query based on the related documents or such related to Netflix documents and feedback (if applicable) to retrieve more relevant information.
    - If there are no documents returned, feedback to yourself that there are no documents to answer the human question and use the Inspector tool to generate the new query closely related to the original question.
</role>

<tools>
    - Generator: use this tool in case the documents are enough to answer the human question, just input the original question as the query and answer generated from the documents, explain to user why the query cannot be answered if there are something wrong in retrieval process.
    - Inspector: use this tool in case the documents are not enough to answer the human question, generate the new expanded query based on the related documents or query, domain should be related to Netflix or Netflix documents in database and feedback to retrieve more relevant information.
</tools>

<instruction>
    - Evaluate the documents whether the information enough or not, more than a single document is sufficient enough to answer the human question. If not, we need to search for more relevant documents.
    - Choose the tool that you want to use to continue the next step.
    - If you use Inspector tool, the new query is generated only from the related documents if there are any else try to reassmble the query based on the original query, and feedback from your insights.
    - Tools and feedback should be used to evaluate the information and generate the answer privately, user should not know details about these.
</instruction>

<feedback>
    - Give out feedback to yourself whether you think the documents are enough to answer the human question or not, from your point of view.
    - If you think the documents are not enough, feedback to yourself what needs to be consider to generate the new query.
    - If you think the documents are enough, feedback to yourself that the documents are enough to answer the human question.
    - Do not invent additional information; if you are uncertain about something, indicate that you do not know.
</feedback>
"""

human_prompt = """\
Please answer the following question based on the documents provided by Netflix Database:
{query}

Here are the documents that have been retrieved from database of Netflix should be related to the user question:
{documents}

Here are your latest feedback to yourself, if there is an empty feedback, you should ignore it:
{feedback}

If you need more information, please let me know. Furthermore, if you think the documents are not enough to answer the question, please provide feedback on what needs to be consider to generate the new query.
"""

In [7]:
query = "Does UK Tax Strategy can be applied in Italy or all of Europe in general?"
documents = [doc.page_content for doc in retriever.invoke(query)]

In [8]:
from langchain_core.messages import SystemMessage, HumanMessage
from loguru import logger

# Reflection loop after retrieval
def process_query_with_expansions(query, documents, max_expansions=3, feedback=""):
    expansion_count = 0
    current_query = query
    current_feedback = feedback

    while expansion_count < max_expansions:
        response = llm_with_tools.invoke(
            [
                SystemMessage(content=system_prompt),
                HumanMessage(content=human_prompt.format(
                    query=current_query,
                    documents=documents,
                    feedback=current_feedback,
                    )
                ),
            ]
        )

        if "QueryExpansion" in response.tool_calls[0].get('name'):
            new_query = response.tool_calls[0].get('args').get('query')
            new_feedback = response.tool_calls[0].get('args').get('feedback')
            new_documents = response.tool_calls[0].get('args').get('documents', [])

            current_query = new_query
            current_feedback = new_feedback
            documents = [doc for doc in new_documents]

            expansion_count += 1

            logger.info(response.tool_calls)
        elif "Generation" in response.tool_calls[0].get('name'):
            logger.info(response.tool_calls)
            return response

    response = llm_with_generation_only_tool.invoke(
        [
            SystemMessage(content=system_prompt),
            HumanMessage(content=human_prompt.format(
                query=current_query,
                documents=documents,
                feedback=current_feedback,
                )
            ),
        ]
    )
    logger.info(response.tool_calls)

    return response

In [None]:
response = process_query_with_expansions(query, documents)

In [None]:
print(response.tool_calls)

### Evalution

In [11]:
# system_prompt = """You are an AI assistant that working in QA system to answer the human question. Based on the query and the docs, answer the human question with the best answer you can generate.\n\nHere are the documents that have been retrieved:\n{documents}"""
# human_prompt = "The original question is: {query}\n\nPlease provide the best answer based on your internal documents."

In [12]:
# query = "Does UK Tax Strategy can be applied in Italy?"
# documents = retriever.invoke(query)
# docs = [doc.page_content for doc in documents]

In [13]:
# response = await azure_openai_chat_model.ainvoke(
#     [
#         SystemMessage(
#             content=system_prompt.format(
#                 documents=documents,
#             )
#         ),
#         HumanMessage(
#             content=human_prompt.format(
#                 query=query,
#             )
#         )
#     ]
# )

In [None]:
# print(response.content)

### Features:
- No hallucination / Stick to ground truth
- Spelling
- TODOS: memory?
- Bias