### Load Your Documents

In [6]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader(
   "/home/babi/Desktop/10academy/week11/Optimized-Contract-QA-RAG-System-Enhancements/data/Evaluation Sets/Raptor Contract.txt"
)

documents = loader.load()

### Instantiate Embedding Model

In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002"
)

### Instantiate LLM

In [8]:
from langchain_openai import OpenAI
import os
# from google.colab import userdata
# os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
openai_llm = OpenAI(temperature=0)

### Document Splitter

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1250,
    chunk_overlap = 100,
    length_function = len,
    is_separator_regex = False
)
#
split_docs = text_splitter.split_documents(documents)
print(len(split_docs))

259


### Instantiate the Vectorstore

In [10]:
from langchain_community.vectorstores import Chroma
vectorstore = Chroma(embedding_function=embeddings,
                     persist_directory="Vectorstore/chromadb",
                     collection_name="full_documents")

### Load and persist the split documents into the vectorstore

In [11]:
vectorstore.add_documents(split_docs)
vectorstore.persist()

### Instantiate the Keyword / Sparse embeddings model

In [12]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.retrievers import ContextualCompressionRetriever
#
bm25_retriever = BM25Retriever.from_documents(split_docs)
bm25_retriever.k=10

### Instantiate Reranker — Cross Encoders

In [13]:
from __future__ import annotations
from typing import Dict, Optional, Sequence
from langchain.schema import Document
from langchain.pydantic_v1 import Extra, root_validator

from langchain.callbacks.manager import Callbacks
from langchain.retrievers.document_compressors.base import BaseDocumentCompressor

from sentence_transformers import CrossEncoder
# from config import bge_reranker_large

class BgeRerank(BaseDocumentCompressor):
    #  BAAI/bge-reranker-large
    model_name:str = 'BAAI/bge-small-en-v1.5'
    """Model name to use for reranking."""
    top_n: int = 3
    """Number of documents to return."""
    model:CrossEncoder = CrossEncoder(model_name)
    """CrossEncoder instance to use for reranking."""

    def bge_rerank(self,query,docs):
        model_inputs =  [[query, doc] for doc in docs]
        scores = self.model.predict(model_inputs)
        results = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)
        return results[:self.top_n]


    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid
        arbitrary_types_allowed = True

    def compress_documents(
        self,
        documents: Sequence[Document],
        query: str,
        callbacks: Optional[Callbacks] = None,
    ) -> Sequence[Document]:
        """
        Compress documents using BAAI/bge-reranker models.

        Args:
            documents: A sequence of documents to compress.
            query: The query to use for compressing the documents.
            callbacks: Callbacks to run during the compression process.

        Returns:
            A sequence of compressed documents.
        """
        if len(documents) == 0:  # to avoid empty api call
            return []
        doc_list = list(documents)
        _docs = [d.page_content for d in doc_list]
        results = self.bge_rerank(query, _docs)
        final_results = []
        for r in results:
            doc = doc_list[r[0]]
            doc.metadata["relevance_score"] = r[1]
            final_results.append(doc)
        return final_results

  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at BAAI/bge-small-en-v1.5 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return torch._C._cuda_getDeviceCount() > 0


### Instantiate a Contextual Compression Pipeline

In [14]:
from langchain_community.document_transformers.embeddings_redundant_filter import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_transformers.long_context_reorder import LongContextReorder
from langchain.retrievers.multi_query import MultiQueryRetriever
#
vs_retriever = vectorstore.as_retriever(search_kwargs={"k":10})
#

ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever,vs_retriever],
                                       weight=[0.5,0.5])
#

redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
#
reordering = LongContextReorder()
#
reranker = BgeRerank()
#
pipeline_compressor = DocumentCompressorPipeline(transformers=[redundant_filter,reordering,reranker])
#
compression_pipeline = ContextualCompressionRetriever(base_compressor=pipeline_compressor,
                                                      base_retriever=ensemble_retriever)

### Helper function to display retrieved documents

In [15]:
def pretty_print_docs(docs):
  print(
      f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n + {d.page_content}" for i,d in enumerate(docs)])
  )

In [16]:
pretty_print_docs(vs_retriever.get_relevant_documents("What are the major changes in v 0.1.0?"))

Document 1:

 + [Reserved]




[Reserved]
----------------------------------------------------------------------------------------------------
Document 2:

 + made, changed or revoked any material Tax election; elected or changed any method of accounting for Tax purposes; settled any Action in respect of Taxes; or entered into any Contractual Obligation in respect of Taxes with any Governmental Authority;
opened any Facility or entered into any new line of business or closed any Facility or discontinued any line of business or any material business operations;
entered into, adopted, terminated, modified, or amended in material respect (including by accelerating material rights or benefits under) any Material Company Contracts;
wrote up or wrote down any of its material Assets or revalue its inventory;
opened any new bank or deposit accounts (or materially change any existing arrangements with respect to any existing bank or deposit accounts) or granted any new powers of attorney;
-----

In [17]:
docs = compression_pipeline.get_relevant_documents("What are the major changes in v 0.1.0?")
pretty_print_docs(docs)

Document 1:

 + Proposed Final Closing Statement.  Within sixty (60) calendar days after the Closing Date, the Company shall prepare or cause to be prepared, and will provide to the Sellers’ Representative, a written statement setting forth in reasonable detail its proposed final determination of the Closing Debt Amount, Closing Cash Amount, and the Seller Transaction Expenses (the “Proposed Final Closing Statement”).  The Proposed Final Closing Statement will be prepared in accordance with the Accounting Principles and without giving effect to any changes resulting from the consummation of the Contemplated Transactions on the Closing Date.  The Sellers’ Representative and its Representatives shall have reasonable access to the work papers and other books and records of the Acquired Companies and to the persons who prepared the Proposed Final Closing Statement, for purposes of assisting the Sellers’ Representative and its Representatives in their review of the Proposed Final Closing St

### Define a naive RAG


In [26]:
from langchain.chains import RetrievalQA
#
qa = RetrievalQA.from_chain_type(llm=openai_llm,
                                 chain_type="stuff",
                                 retriever=vectorstore.as_retriever(search_kwargs={"k":5}),
                                 return_source_documents=True)

naive_response = qa("Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties?")
naive_response["result"]

" The Sellers are responsible for a breach of representations and warranties to the extent of their Pro Rata Percentage, unless the breach was caused by the Sellers' Representative's gross negligence, bad faith, or willful misconduct."

### Define an Advanced RAG

In [27]:
from langchain.chains import RetrievalQA
#
qa_advanced = RetrievalQA.from_chain_type(llm=openai_llm,
                                 chain_type="stuff",
                                 retriever=compression_pipeline,
                                 return_source_documents=True)
#
qa_adv_response = qa_advanced("Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties?")  
qa_adv_response["result"]

" The Sellers are responsible for a breach of representations and warranties if it is committed by any of their Affiliates or their or their Affiliates' Representatives. However, the Sellers' liability is limited to the obligations outlined in the agreement and the Buyer cannot seek recourse from the personal assets of the Sellers' Representative."

## Evaluating Naive RAG and Advanced RAG using RAGAS evaluation Framework

### Synthetic Test Set Generation

In [29]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
#
#load documents again to avoid any kind of bias
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)
documents = text_splitter.split_documents(documents)
len(documents)
#
#
generator = TestsetGenerator.with_openai()
#
testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})


Generating:  50%|█████     | 5/10 [00:17<00:17,  3.43s/it]         
Exception in thread Thread-9:
Traceback (most recent call last):
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/site-packages/ragas/executor.py", line 75, in run
    results = self.loop.run_until_complete(self._aresults())
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
    return future.result()
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/site-packages/ragas/executor.py", line 63, in _aresults
    raise e
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/site-packages/ragas/executor.py", line 58, in _aresults
    r = await future
  File "/home/babi/miniconda3/envs/week10/lib/python3.8/asyncio/tasks.py", line 608, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "/home/babi/miniconda3/envs/

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exception=False` incase you want to show only a warning message instead.

In [None]:
testset.test_data[0]

### Generating Responses with RAG Pipeline

In [None]:
test_df = testset.to_pandas()
test_questions = test_df["question"].values.tolist()
test_groundtruths = test_df["ground_truth"].values.tolist()
test_df.head()

#### Generate responses using our Naive RAG pipeline using the questions we’ve generated.

In [None]:
answers = []
contexts = []

for question in test_questions:
  response = qa.invoke({"query" : question})
  answers.append(response["result"])
  contexts.append([context.page_content for context in response['source_documents']])

#### Wrap the information in a Hugging Face dataset for use in the Ragas library.

In [None]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})
response_dataset[0]

#### Evaluating with RAGAS

In [None]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]
#
naive_results = evaluate(response_dataset, metrics,raise_exceptions=False)
naive_results


#### Generate responses using our Advanced RAG pipeline using the questions we’ve generated.

In [None]:
adv_answers = []
adv_contexts = []

for question in test_questions:
  response = qa_advanced.invoke({"query" : question})
  adv_answers.append(response["result"])
  adv_contexts.append([context.page_content for context in response['source_documents']])

#wrap into huggingface dataset
response_dataset_advanced_retrieval = Dataset.from_dict({
    "question" : test_questions,
    "answer" : adv_answers,
    "contexts" : adv_contexts,
    "ground_truth" : test_groundtruths
})
response_dataset_advanced_retrieval[0]

In [None]:
advanced_retrieval_results = evaluate(response_dataset_advanced_retrieval, metrics,raise_exceptions=False)
advanced_retrieval_results


#### Comapare the evaluations


In [None]:
import pandas as pd

df_original = pd.DataFrame(list(naive_results.items()), columns=['Metric', 'Baseline'])
df_comparison = pd.DataFrame(list(advanced_retrieval_results.items()), columns=['Metric', 'Contextual Compresssion with Document Stuffing'])

df_merged = pd.merge(df_original, df_comparison, on='Metric')

df_merged['Delta'] = df_merged['Contextual Compresssion with Document Stuffing'] - df_merged['Baseline']

df_merged