In [2]:
! pip install langchain chromadb openai PyMuPDF tiktoken



In [3]:
! pip install -U langchain-openai



In [4]:
import fitz  # PyMuPDF

def extract_text_from_pdf(file_path):
    text = ""
    with fitz.open(file_path) as doc:
        for page in doc:
            text += page.get_text()
    return text

pdf_text_1 = extract_text_from_pdf("../data/Raptor.pdf")
pdf_text_2 = extract_text_from_pdf("../data/robinson.pdf")

texts = [pdf_text_1, pdf_text_2]


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

chunk_size = 1000
chunk_overlap = 0

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

chunks = []
for text in texts:
    chunks.extend(text_splitter.split_text(text))


In [6]:
print(f"\nTotal chunks: {len(chunks)}")


Total chunks: 248


In [12]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.docstore.document import Document

In [13]:
persist_directory = 'db'
embedding = OpenAIEmbeddings()
documents = [Document(page_content=chunk) for chunk in chunks]

vectordb = Chroma.from_documents(documents=documents, embedding=embedding, persist_directory=persist_directory)


In [14]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [19]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

retriever = vectordb.as_retriever()
llm = ChatOpenAI(model="gpt-3.5-turbo", openai_api_key=os.getenv("OPENAI_API_KEY"))

rag_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=retriever)

def rag_qa(query):
    response = rag_chain.invoke(query)
    return response


Robinson QnA

In [28]:
query = "In which street does the Advisor live?"
answer = rag_qa(query)
print(answer)

The Advisor, Mr. Jack Robinson, resides at 1 Rabin St, Tel Aviv, Israel.


In [29]:
query = "Who owns the IP?"
answer = rag_qa(query)
print(answer)

Based on the provided context, it appears that the Acquired Companies own the Intellectual Property (IP). The text states that no person has interfered with, infringed upon, diluted, misappropriated, or violated any Company Intellectual Property, and no Acquired Company has made any claims against others regarding interference, infringement, dilution, misappropriation, or violation of their Intellectual Property. Additionally, the Acquired Companies have maintained commercially reasonable practices to protect their confidential information and trade secrets, requiring employees and contractors to maintain confidentiality and use the information solely for the benefit of the Acquired Companies.


In [18]:
query = "Who are the parties to the Agreement and what are their defined names?"
answer = rag_qa(query)
print(answer)

{'query': 'Who are the parties to the Agreement and what are their defined names?', 'result': 'The parties to the Agreement are:\n\n1. **Buyer** - Referred to as "BUYER" in the Agreement.\n2. **The Company** - Referred to as "COMPANY" in the Agreement.\n3. **Sellers’ Representative** - Referred to as "SELLERS’ REPRESENTATIVE" in the Agreement.\n4. **The Sellers** - Referred to as "SELLERS" in the Agreement.\n\nThese are the defined names used throughout the Agreement.'}


In [20]:
query = "What is the termination notice according to section 4 of the Robinson contract?"
answer = rag_qa(query)
print(answer)

  warn_deprecated(


The termination notice according to the provided context is thirty (30) days or shorter prior notice, subject to the applicable Legal Requirements and Contractual Obligations.


In [21]:
query = "What are the payments to the Advisor under the Agreement?"
answer = rag_qa(query)
print(answer)

Under the Agreement, the payments to the Advisor are as follows:

1. **Hourly Fees**: The Advisor will be paid USD 9 per Billable Hour, with a maximum limit of USD 1,500 per month.

2. **Workspace Expense**: The Advisor will receive an additional USD 100 per month to finance a professional workspace, provided that the Advisor actually hires such a workspace.

The Advisor is not entitled to any additional fees or expense reimbursements beyond what is explicitly stated in the Agreement.


Raptor QnA

In [23]:
query = "How much is the escrow amount in the Raptor contract?"
answer = rag_qa(query)
print(answer)

The provided context does not specify the exact amount of the escrow in the Raptor contract. If you need to know the exact amount, you may need to refer to the Escrow Agreement or other relevant sections of the contract that detail the financial specifics.


In [24]:
query = "Does the Buyer need to pay the Employees Closing Bonus Amount directly to the Company’s employees?"
answer = rag_qa(query)
print(answer)

No, the Buyer does not need to pay the Employees Closing Bonus Amount directly to the Company’s employees. According to the provided context, at the Closing, the Buyer shall deposit the Employee Closing Bonus Amount with the Company, and the Company will then pay the bonuses through its payroll system to the individuals listed as Employees Closing Bonus Beneficiaries.


In [25]:
query = "Whose consent is required for the assignment of the Agreement by the Buyer?"
answer = rag_qa(query)
print(answer)

Based on the provided context, the Buyer does not need to seek consent to assign the Agreement, provided that the assignment is to one or more of its Affiliates, or to any purchaser of all or substantially all its assets. However, the Buyer must ensure that it is not relieved of any liability or obligations under the Agreement even after the assignment.


In [26]:
query = " Are there any conditions to the closing?"
answer = rag_qa(query)
print(answer)

The provided context does not specify any particular conditions that need to be met for the closing to occur. It mainly outlines the actions taken regarding the cancellation of options, termination of stock plans, and the process for closing payments. If there are specific conditions to the closing, they are not detailed in the excerpts given.
