In [8]:
import os
os.chdir("../")

from src.chroma_store import initialize_vectorstore, load_documents_from_dir
from src.chunking_strategies import chunk_by_semantic, chunk_by_recursive_split
from src.rag_pipeline import create_rank_fusion_chain, generate_answer
from langchain_openai import ChatOpenAI
from src.utils import format_tuple_docs_to_text, format_docs_to_text

In [2]:
# Load the documents from the data directory.
documents = load_documents_from_dir("data/content")

chunks = chunk_by_recursive_split(documents, chunk_size=800)

vectorstore = initialize_vectorstore(chunks)

--INFO-- Loading documents from data/content


2024-07-10 05:41:55 - src.chroma_store - INFO - Clearing out the chroma database.
2024-07-10 05:41:55 - src.chroma_store - INFO - Creating a new chroma database.


--INFO-- Loaded 1 documents
Split 1 documents into 26 chunks.
Advisor shall be solely responsible for any income taxes or other assessments made or imposed by any governmental authority on Advisor with respect to the Services rendered and the compensation received hereunder, and any and all expenses and costs of himself, employees, agents and representatives, including, without limitation, any salary, overtime, severance or social benefits payable thereto, and marketing costs incurred in connection with the performance of obligations hereunder.

Confidentiality, Non-Competition and IP Ownership Undertaking: In connection with the performance of Advisor’s obligations under this Agreement, the Advisor shall execute a Confidentiality, Non-Competition and IP Ownership Undertaking in the form attached hereto as Exhibit A.
{'source': 'data/content/Robinson Advisory.docx', 'start_index': 5516}


2024-07-10 05:41:59 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2024-07-10 05:42:02 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [4]:
similarity_count = 5
retriever = vectorstore.as_retriever(search_kwargs={"k": similarity_count})

### Question

In [6]:
question = "Who are the parties to the Agreement and what are their defined names?"


#### Test answer generation

In [5]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_community.llms import Cohere
from langchain_cohere import ChatCohere


llm = ChatCohere(
    model="command",
    temperature=0,
)

retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": 5,
    }
)
compressor = CohereRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    question
)
# pretty_print_docs(compressed_docs)

2024-07-08 12:40:03 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-08 12:40:04 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/rerank "HTTP/1.1 200 OK"


In [9]:
llm = ChatOpenAI(model="gpt-4o", temperature=0)



# retrieval_chain = create_rank_fusion_chain(question, llm, compression_retriever)
docs = retriever.invoke(question)

context_text = format_docs_to_text(docs)

answer = generate_answer(question, context_text, llm=llm)

2024-07-10 05:44:16 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 429 Too Many Requests"
2024-07-10 05:44:16 - openai._base_client - INFO - Retrying request to /embeddings in 0.890523 seconds
2024-07-10 05:44:17 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-10 05:44:19 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [10]:
answer

'The parties to the Agreement are Cloud Investments Ltd. ("Company") and Jack Robinson ("Advisor").'

### Ragas for testing

In [10]:
from src.ragas_eval import run_evaluation

results = run_evaluation(retriever=retriever,
               file_path="data/evaluation_sets/Robinson_Q&A.docx",
               llm=llm
               )

results.head()

2024-07-08 12:43:51 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-08 12:43:53 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:43:53 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-08 12:43:55 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:43:55 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-08 12:43:59 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:43:59 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-07-08 12:44:01 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:01 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embedd

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

2024-07-08 12:44:19 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:19 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:19 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:19 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:20 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:20 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:20 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:20 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-07-08 12:44:20 - httpx - INFO - HTTP Request: POST https://

Unnamed: 0,question,answer,contexts,ground_truth,answer_correctness,faithfulness,context_precision,context_recall
0,Who are the parties to the Agreement and what ...,The parties to the Agreement are Cloud Investm...,[Entire Agreement; No Waiver or Assignment: Th...,Cloud Investments Ltd. (“Company”) and Jack Ro...,0.733393,1.0,1.0,1.0
1,What is the termination notice?,Either party may terminate the Agreement at an...,[Term: The term of this Agreement shall commen...,According to section 4:14 days for convenience...,0.538494,1.0,0.805556,0.5
2,What are the payments to the Advisor under the...,The payments to the Advisor under the Agreemen...,[As full and sole consideration for the Servic...,According to section 6: 1. Fees of $9 per hour...,0.222408,1.0,0.7,1.0
3,Can the Agreement or any of its obligations be...,"The Agreement may not be assigned, sold, deleg...",[Entire Agreement; No Waiver or Assignment: Th...,1. Under section 1.1 the Advisor can’t assign ...,0.377274,0.833333,1.0,1.0
4,Who owns the IP?,The Company owns the IP. According to the IP s...,"[IP: Any Work Product, upon creation, shall be...",According to section 4 of the Undertaking (App...,0.722743,1.0,1.0,1.0


In [9]:
print(results)

                                            question  \
0  Who are the parties to the Agreement and what ...   
1                    What is the termination notice?   
2  What are the payments to the Advisor under the...   
3  Can the Agreement or any of its obligations be...   
4                                  Who owns the IP?    
5  Is there a non-compete obligation to the Advisor?   
6              Can the Advisor charge for meal time?   
7             In which street does the Advisor live?   
8       Is the Advisor entitled to social benefits?    
9  What happens if the Advisor claims compensatio...   

                                              answer  \
0  The parties to the Agreement are Cloud Investm...   
1  Either party may terminate the Agreement at an...   
2  The payments to the Advisor under the Agreemen...   
3  The Agreement may not be assigned, sold, deleg...   
4  The Company owns the IP. According to the IP s...   
5  Yes, during the term of engagement with the 