In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

Setting up the Model


In [5]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = 'your_api_key_here'

In [7]:
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")

In [24]:
import os
import bs4
from docx import Document as DocxDocument
from langchain import hub
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# Function to load a .docx document
def load_docx_document(file_path):
    doc = DocxDocument(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

#### INDEXING ####

# Set the path to the specific file
# Assuming the script/notebook is in the "notebooks" directory and "data" is a sibling directory
current_working_directory = os.getcwd()
parent_directory = os.path.dirname(current_working_directory)
file_path = os.path.join(parent_directory, "data", "Raptor Contract.docx")

# Load the .docx Document
docx_text = load_docx_document(file_path)

# Convert loaded text to LangChain document format
docs = [Document(page_content=docx_text)]

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-4", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)




In [25]:

# Question
response = rag_chain.invoke("Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties?")
print(response)

The Sellers' Representative is not held liable for actions or omissions in exercising their power and authority, unless it involves gross negligence, bad faith, or willful misconduct. They are entitled to rely on the advice of experts and will not be liable for any action taken in good faith based on such advice. Each Seller will indemnify the Sellers’ Representative from any losses, except for those caused by the Sellers’ Representative’s gross negligence, bad faith or willful misconduct.


In [26]:
# Question 2
response = rag_chain.invoke("How much is the escrow amount?")
print(response)

The context provided does not give information on the specific escrow amount.


In [29]:
# Question 3
response = rag_chain.invoke("Are there any conditions to the closing? ")
print(response)


Yes, there are conditions to the closing. The acquired company will not be required to include any amount in taxable income or exclude any item of deduction or loss from taxable income for any taxable period ending after the Closing Date. This is due to various factors such as any "closing agreement", any deferred intercompany gain or excess loss account, installment sale or open transaction disposition made on or prior to the Closing Date, and any prepaid amount received on or prior to the Closing Date other than prepayments by customers.
