In [None]:
%pip install -qU langchain-text-splitters

In [None]:
%pip install aiofiles

In [None]:
%pip install python-docx

In [None]:
!pip install langchainhub

In [302]:
import os, sys
from dotenv import load_dotenv
rpath = os.path.abspath('/home/user/Documents/10/w11/contract_advisor_rag')

if rpath not in sys.path:
    sys.path.insert(0, rpath)

In [220]:
from typing import AsyncIterator, Iterator

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document

In [222]:
file_path = rpath + "/data/Raptor Contract.docx"

In [223]:
from docx import Document as DocxDocument
from typing import Iterator
def lazy_load(file_path) -> Iterator[Document]:  # <-- Does not take any arguments
        """A lazy loader that reads a file line by line.

        When you're implementing lazy load methods, you should use a generator
        to yield documents one by one.
        """
        try:
            doc = DocxDocument(file_path)
            line_number = 0
            for para in doc.paragraphs:
                if para.text.strip():  # Skip empty paragraphs
                    yield Document(
                        page_content=para.text,
                        metadata={"line_number": line_number, "source": file_path},
                    )
                    line_number += 1
        except Exception as e:
            print(f"Error reading {file_path}: {str(e)}")

In [224]:
file = lazy_load(file_path)

file

<generator object lazy_load at 0x7ede19565240>

In [225]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    separators=[
        "\n\n",
        "\n",
        " ",
        ".",
        ",",
        "\u200b",  # Zero-width space
        "\uff0c",  # Fullwidth comma
        "\u3001",  # Ideographic comma
        "\uff0e",  # Fullwidth full stop
        "\u3002",  # Ideographic full stop
        "",
    ],
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

In [226]:
documents = text_splitter.split_documents(file)

In [227]:
documents[0]

Document(metadata={'line_number': 0, 'source': '/home/user/Documents/10/w11/contract_advisor_rag/data/Raptor Contract.docx'}, page_content='STOCK PURCHASE AGREEMENT')

In [304]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
import chromadb

In [229]:
from dotenv import load_dotenv
load_dotenv()

True

In [230]:
api_key=os.getenv("OPENAI_API_KEY")

In [231]:
persist_directory = rpath + "../../data"
persist_directory

'/home/user/Documents/10/w11/contract_advisor_rag../../data'

In [305]:
from backend.service.chroma_db_manager import ChromaDBManager
from backend.service.rag_processor import RAGProcessor

In [233]:
os.path.exists(persist_directory)
      

False

In [234]:
from langchain_chroma import Chroma

In [235]:
db = Chroma.from_documents(documents, OpenAIEmbeddings(model="text-embedding-ada-002"))

In [236]:
db.similarity_search_with_score("Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties")

[(Document(metadata={'line_number': 405, 'source': '/home/user/Documents/10/w11/contract_advisor_rag/data/Raptor Contract.docx'}, page_content='INDIVIDUAL REPRESENTATIONS AND WARRANTIES OF THE SELLERS.'),
  0.2554522454738617),
 (Document(metadata={'line_number': 405, 'source': '/home/user/Documents/10/w11/contract_advisor_rag/data/Raptor Contract.docx'}, page_content='INDIVIDUAL REPRESENTATIONS AND WARRANTIES OF THE SELLERS.'),
  0.2554522454738617),
 (Document(metadata={'line_number': 423, 'source': '/home/user/Documents/10/w11/contract_advisor_rag/data/Raptor Contract.docx'}, page_content='with respect to the Contemplated Transactions for which the Sellers could be liable.'),
  0.25967124104499817),
 (Document(metadata={'line_number': 423, 'source': '/home/user/Documents/10/w11/contract_advisor_rag/data/Raptor Contract.docx'}, page_content='with respect to the Contemplated Transactions for which the Sellers could be liable.'),
  0.25967124104499817)]

In [237]:
from backend.service.query_analysis import QueryAnalyzer,ParaphrasedQuery

In [238]:
# # Usage example:
query_analyzer = QueryAnalyzer()
result = query_analyzer.analyze_query("Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties")
result

[ParaphrasedQuery(paraphrased_query='What are the situations in which Sellers are accountable for a violation of representations and warranties and to what degree?')]

In [239]:
rag = db.as_retriever(
                search_type="similarity_score_threshold", 
                search_kwargs={"score_threshold": 0.5}
            )

In [240]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

In [254]:
template = """
           You are an AI assistant specializing in providing detailed and accurate answers based on the provided context documents. 
Your task is to understand the user's question, retrieve the most relevant information from the context documents, 
and generate a well-informed and concise response. Use the information from the documents to support your answers.

Instructions:
1. Always prioritize information from the provided context documents.
2. If the context documents do not contain the required information, acknowledge it and provide a general answer based on your training data.
3. Ensure your responses are clear, concise, and relevant to the user's question.
4. If there are multiple relevant points, structure your response in a logical order. please respond with 'I don't know':
            {question}
            Context:
            {context}
            """

prompt = ChatPromptTemplate.from_messages([("human", template)])

In [255]:
from langchain_openai import ChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain import hub



In [256]:
llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [257]:
#retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

In [270]:

rag_chain = (
                {"context": itemgetter("question") | rag, "question": itemgetter("question") } 
                | RunnablePassthrough.assign(context=itemgetter("context"))
               
                | {"response": prompt | llm, "context": itemgetter("context") }
            )

In [None]:
rag_chain.invoke({"question": 'Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties'})

In [272]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI

question = "What are the approaches to Task Decomposition?"
llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=rag, llm=llm
)

from langchain.output_parsers import PydanticToolsParser
llm_with_tools = llm.bind_tools([ParaphrasedQuery])
rag_chain2 = (
                {"context": itemgetter("question") | retriever_from_llm, "question": itemgetter("question")}
                | RunnablePassthrough.assign(context=itemgetter("context"))
               
                | {"response": prompt | llm, "context": itemgetter("context") }
            )

In [None]:
rag_chain.invoke({"question": 'Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties'})

# CohereRerank

In [274]:
COHERE_API_KEY=os.getenv("COHERE_API_KEY")

In [275]:
from langchain.retrievers import ContextualCompressionRetriever, CohereRagRetriever
from langchain.retrievers.document_compressors import CohereRerank

from langchain_cohere import CohereEmbeddings
from langchain_community.chat_models import ChatCohere
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma

In [276]:
question = 'Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties'


In [280]:
cohere_chat_model = ChatCohere(cohere_api_key=COHERE_API_KEY)
cohere_embeddings = CohereEmbeddings(cohere_api_key=COHERE_API_KEY)

In [281]:
cohere_rerank = CohereRerank(cohere_api_key=COHERE_API_KEY)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=cohere_rerank, 
    base_retriever=rag
)

In [282]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_community.llms import Cohere

llm = Cohere(temperature=0)
compressor = CohereRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=rag
)


In [288]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(
    llm=Cohere(temperature=0), retriever=compression_retriever
)

chain({"query": question})

{'query': 'Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties',
 'result': " The sellers may be held liable for damages resulting from a breach of their representations and warranties in connection with the contemplated transactions if such breach involves fraud, malfeasance, or a material deviation from the prescribed standard of care. \n\nIt is important to carefully review the specific terms of the agreement for an accurate determination of the precise scope of liability and the circumstances that would give rise to a seller's responsibility for a breach of representations and warranties. "}

In [None]:
compressed_docs = compression_retriever.get_relevant_documents(question)
# Print the relevant documents from using the embeddings and reranker
compressed_docs

# RAGAS

In [165]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# generator with openai models
generator_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")
critic_llm = ChatOpenAI(model="gpt-4")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

# generate testset
testset = generator.generate_with_langchain_docs(documents, test_size=12, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})

embedding nodes:   0%|          | 0/1178 [00:00<?, ?it/s]

Generating:   0%|          | 0/12 [00:00<?, ?it/s]

In [167]:
test_df = testset.to_pandas()

In [169]:
test_df

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,Who determines the tax withholding for payment...,"[by the Company at the time of payment, will b...",Buyer or the Paying Agent determines the tax w...,simple,"[{'line_number': 240, 'source': '/home/user/Do...",True
1,What types of transactions fall under the cont...,[any Contractual Obligation relating to the ac...,The contractual obligations of an Acquired Com...,simple,"[{'line_number': 365, 'source': '/home/user/Do...",True
2,What requirements must a Seller that is not an...,[Organization. In the case of each Seller tha...,A Seller that is not an individual must be dul...,simple,"[{'line_number': 407, 'source': '/home/user/Do...",True
3,Are there any pending or threatened condemnati...,[No eminent domain or condemnation Action is p...,No eminent domain or condemnation Action is pe...,simple,"[{'line_number': 296, 'source': '/home/user/Do...",True
4,What items will the Sellers' Representative de...,"[to the Sellers’ Representative, a copy of the...",,simple,"[{'line_number': 209, 'source': '/home/user/Do...",True
5,What payments become due upon termination of e...,"[notice, subject to the applicable Legal Requi...",No severance or other payments will become due...,simple,"[{'line_number': 394, 'source': '/home/user/Do...",True
6,Who determines tax withholding if no declarati...,"[by the Company at the time of payment, will b...",Buyer or the Paying Agent determines tax withh...,reasoning,"[{'line_number': 240, 'source': '/home/user/Do...",True
7,What are the acceptable methods for service of...,[Service of Process. Each party hereby (i) co...,The acceptable methods for service of process ...,reasoning,"[{'line_number': 497, 'source': '/home/user/Do...",True
8,Are there any ongoing obligations related to E...,[no Acquired Company has any ongoing obligatio...,"No, there are no ongoing obligations related t...",reasoning,"[{'line_number': 357, 'source': '/home/user/Do...",True
9,What types of transactions are considered cont...,[any Contractual Obligation relating to the ac...,Contractual obligations relating to the acquis...,multi_context,"[{'line_number': 365, 'source': '/home/user/Do...",True


In [None]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

generator = TestsetGenerator.with_openai()

testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})

In [171]:
test_questions = test_df["question"].values.tolist()
test_groundtruths = test_df["ground_truth"].values.tolist()

In [208]:
answers = []
contexts = []

for question in test_questions:
  response = rag_chain.invoke({"question" : question})
  answers.append(response["response"].content)
  contexts.append([context.page_content for context in response["context"]])

In [209]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [212]:
response_dataset[0]

{'question': 'Who determines the tax withholding for payments made by the Buyer or the Paying Agent?',
 'answer': 'The tax withholding for payments made by the Buyer or the Paying Agent is determined by the Buyer or the Paying Agent themselves. They will reasonably determine the tax withholding according to the applicable withholding rate. It is essential for the Payee to provide a Valid Certificate at least three Business Days prior to the Withholding Drop Date to ensure proper withholding.',
 'contexts': ['will be subject to tax withholding as will be reasonably determined by Buyer or the Paying Agent.',
  'according to the applicable withholding rate as reasonably determined by Buyer and the Paying',
  'or the Paying Agent with a Valid Certificate at least three Business Days prior to the Withholding',
  'Days prior to the Withholding Drop Date. If a Payee (i) does not provide Buyer or the Paying Agent'],
 'ground_truth': 'Buyer or the Paying Agent determines the tax withholding for

In [214]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]

In [216]:
results = evaluate(response_dataset, metrics)
results

Evaluating:   0%|          | 0/60 [00:00<?, ?it/s]

{'faithfulness': 0.6764, 'answer_relevancy': 0.9845, 'context_recall': 0.8056, 'context_precision': 0.8125, 'answer_correctness': 0.5139}

In [218]:
results_df = results.to_pandas()
results_df

Unnamed: 0,question,answer,contexts,ground_truth,faithfulness,answer_relevancy,context_recall,context_precision,answer_correctness
0,Who determines the tax withholding for payment...,The tax withholding for payments made by the B...,[will be subject to tax withholding as will be...,Buyer or the Paying Agent determines the tax w...,0.333333,1.0,1.0,1.0,0.530065
1,What types of transactions fall under the cont...,The types of transactions that fall under the ...,"[Transactions by the Acquired Companies., Acqu...",The contractual obligations of an Acquired Com...,0.75,1.0,0.0,1.0,0.237286
2,What requirements must a Seller that is not an...,To be considered duly organized and in good st...,[Organization. In the case of each Seller tha...,A Seller that is not an individual must be dul...,0.5,0.99719,1.0,1.0,0.451938
3,Are there any pending or threatened condemnati...,Based on the information provided in the conte...,[is party that affects any of the Real Propert...,No eminent domain or condemnation Action is pe...,0.5,0.999999,1.0,0.0,0.230527
4,What items will the Sellers' Representative de...,"At the Closing, the Sellers' Representative wi...","[in this Agreement, the Buyer shall deliver or...",,0.333333,0.982013,0.0,0.0,0.177197
5,What payments become due upon termination of e...,Upon termination of employment or engagement o...,[termination of the employment or engagement o...,No severance or other payments will become due...,1.0,1.0,1.0,1.0,0.655194
6,Who determines tax withholding if no declarati...,If no declaration is provided before payment t...,[Company’s payroll payment system. If such dec...,Buyer or the Paying Agent determines tax withh...,1.0,0.985792,1.0,0.75,0.839584
7,What are the acceptable methods for service of...,The acceptable methods for service of process ...,[with this Agreement in any manner permitted b...,The acceptable methods for service of process ...,0.5,0.970649,1.0,1.0,0.494173
8,Are there any ongoing obligations related to E...,Based on the information provided in the conte...,[no Acquired Company has any ongoing obligatio...,"No, there are no ongoing obligations related t...",1.0,0.986471,1.0,1.0,0.541222
9,What types of transactions are considered cont...,The types of transactions that are considered ...,[any Contractual Obligation relating to the ac...,Contractual obligations relating to the acquis...,0.6,0.945889,1.0,1.0,0.701617


# AUTOGEN

In [312]:
from backend.service.chroma_db_manager import ChromaDBManager
from backend.service.rag_processor import RAGProcessor
from backend.controller import Controller as controller

ImportError: cannot import name 'Controller' from 'backend.controller' (unknown location)

In [310]:
controller.init_process()

NameError: name 'controller' is not defined

In [None]:
controller.generate_prompt()

In [None]:
controller.evaluate()