In [1]:
import os
os.chdir("../")

from src import get_answer, load_documents_from_dir, save_to_chroma, get_vectorstore, split_text, get_retriever
from src.utils import format_docs_to_text

In [2]:
query = "What is the purpose of the escrow?"

def initialize_vectorstore():
    # Load the documents from the data directory.
    documents = load_documents_from_dir("data/content")
    # Split the documents into chunks.
    chunks = split_text(documents)
    # Save the chunks to the chroma store.
    vectorstore = save_to_chroma(chunks)
    return vectorstore
vectorstore = initialize_vectorstore()

2024-07-03 09:58:45 - src.chroma_store - INFO - Loading documents from data/content
2024-07-03 09:58:53 - src.chroma_store - INFO - Clearing out the chroma database.
2024-07-03 09:58:53 - src.chroma_store - INFO - Creating a new chroma database.


Split 1 documents into 1999 chunks.


2024-07-03 09:58:55 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2024-07-03 09:59:01 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-03 09:59:10 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [3]:
similarity_threshold = 0.5
similarity_count = 5
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold",
                                      search_kwargs={'score_threshold': similarity_threshold,
                                                      "k": similarity_count})

### Prompt

In [5]:
question = " Whose consent is required for the assignment of the Agreement by the Buyer?"


In [16]:
from src.rag_pipeline import get_answer, create_rank_fusion_chain, get_unique_union
from langchain.prompts import ChatPromptTemplate
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableMap
# from src.utils import format_tuple_docs_to_text



In [None]:
def format_tuple_docs_to_text(docs):
    """ Formats a list of (doc, score) tuples into a text string. """
    return "\n\n---\n\n".join(doc.page_content for doc, _ in docs)  

In [27]:
llm = ChatOpenAI(temperature=0)

retrieval_chain = create_rank_fusion_chain(question, llm, retriever)
docs = retrieval_chain.invoke({"question": question})

context_text = format_tuple_docs_to_text(docs)

def generate_answer(question, context, llm = None):

    # RAG
    template = """Answer the following question based on this context:
    For questions starting with "Is", start "Yes" or "No" then proceed.

    {context}

    Question: {question}
    """

    prompt = ChatPromptTemplate.from_template(template)

    final_rag_chain = (
        {"context": itemgetter("context"), "question": itemgetter("question")}
        | prompt
        | llm
        | StrOutputParser()
    )

    answer = final_rag_chain.invoke({"context":context, "question":question})
    return answer



answer = generate_answer(question, context_text, retriever=retriever, llm=llm)

2024-07-03 10:44:23 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-03 10:44:24 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-03 10:44:24 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-03 10:44:24 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-03 10:44:24 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-03 10:44:24 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-03 10:44:25 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [28]:
answer

'The consent of the Sellers is required for the assignment of the Agreement by the Buyer.'