In [1]:
from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

# Now you can access the API key as an environment variable
api_key = os.getenv("OPENAI_API_KEY")
langchain = os.getenv("LANGCHAIN_API_KEY")

In [3]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
import pysqlite3 as sqlite3
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from openai import OpenAI


In [4]:
loader = PyPDFLoader("../data/Raptor Contract.pdf")
pages = loader.load_and_split()

In [5]:
pages

[Document(page_content='[R&G\nDraft\n12.__.2021]\nSTOCK\nPURCHASE\nAGREEMENT\nBY\nAND\nAMONG\n[BUYER],\n[TARGET\nCOMP ANY],\nTHE\nSELLERS\nLISTED\nON\nSCHEDULE\nI\nHERET O\nAND\nTHE\nSELLERS’\nREPRESENT ATIVE\nNAMED\nHEREIN\nDated\nas\nof\n[●]\n[This\ndocument\nis\nintended\nsolely\nto\nfacilitate\ndiscussions\namong\nthe\nparties\nidentified\nherein. \nNeither\nthis\ndocument\nnor\nsuch\ndiscussions\nare\nintended\nto\ncreate,\nnor\nwill\neither\nor\nboth\nbe \ndeemed\nto\ncreate,\na\nlegally\nbinding\nor\nenforceable\noffer\nor\nagreement\nof\nany\ntype\nor\nnature, \nunless\nand\nuntil\na\ndefinitive\nwritten\nagreement\nis\nexecuted\nand\ndelivered\nby\neach\nof\nthe\nparties \nhereto.\nThis\ndocument\nshall\nbe\nkept\nconfidential\npursuant\nto\nthe\nterms\nof\nthe\nConfidentiality \nAgreement\nentered\ninto\nby\nthe\nparties\nand,\nif\napplicable,\nits\naffiliates\nwith\nrespect\nto\nthe\nsubject \nmatter\nhereof.]\n112923184_5', metadata={'source': '../data/Raptor Contract.pdf',

In [6]:
len(pages)

76

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

In [8]:
# Assuming you have a list of Document objects (pages) and a text_splitter
# Extract the page_content from each Document object
texts = [doc.page_content for doc in pages]

# Split the texts into chunks
chunked_texts = text_splitter.create_documents(texts)



In [9]:
texts

['[R&G\nDraft\n12.__.2021]\nSTOCK\nPURCHASE\nAGREEMENT\nBY\nAND\nAMONG\n[BUYER],\n[TARGET\nCOMP ANY],\nTHE\nSELLERS\nLISTED\nON\nSCHEDULE\nI\nHERET O\nAND\nTHE\nSELLERS’\nREPRESENT ATIVE\nNAMED\nHEREIN\nDated\nas\nof\n[●]\n[This\ndocument\nis\nintended\nsolely\nto\nfacilitate\ndiscussions\namong\nthe\nparties\nidentified\nherein. \nNeither\nthis\ndocument\nnor\nsuch\ndiscussions\nare\nintended\nto\ncreate,\nnor\nwill\neither\nor\nboth\nbe \ndeemed\nto\ncreate,\na\nlegally\nbinding\nor\nenforceable\noffer\nor\nagreement\nof\nany\ntype\nor\nnature, \nunless\nand\nuntil\na\ndefinitive\nwritten\nagreement\nis\nexecuted\nand\ndelivered\nby\neach\nof\nthe\nparties \nhereto.\nThis\ndocument\nshall\nbe\nkept\nconfidential\npursuant\nto\nthe\nterms\nof\nthe\nConfidentiality \nAgreement\nentered\ninto\nby\nthe\nparties\nand,\nif\napplicable,\nits\naffiliates\nwith\nrespect\nto\nthe\nsubject \nmatter\nhereof.]\n112923184_5',
 'TABLE\nOF\nCONTENTS\nARTICLE\nI\nDEFINITIONS;\nCER TAIN\nRULES\nOF\nCO

In [10]:
print(chunked_texts)

[Document(page_content='[R&G\nDraft\n12.__.2021]\nSTOCK\nPURCHASE\nAGREEMENT\nBY\nAND\nAMONG\n[BUYER],\n[TARGET\nCOMP ANY],\nTHE\nSELLERS'), Document(page_content='THE\nSELLERS\nLISTED\nON\nSCHEDULE\nI\nHERET O\nAND\nTHE\nSELLERS’\nREPRESENT ATIVE\nNAMED\nHEREIN\nDated\nas\nof'), Document(page_content='HEREIN\nDated\nas\nof\n[●]\n[This\ndocument\nis\nintended\nsolely\nto\nfacilitate\ndiscussions\namong\nthe'), Document(page_content='among\nthe\nparties\nidentified\nherein. \nNeither\nthis\ndocument\nnor\nsuch\ndiscussions\nare\nintended\nto'), Document(page_content='are\nintended\nto\ncreate,\nnor\nwill\neither\nor\nboth\nbe \ndeemed\nto\ncreate,\na\nlegally\nbinding\nor'), Document(page_content='legally\nbinding\nor\nenforceable\noffer\nor\nagreement\nof\nany\ntype\nor\nnature, \nunless\nand\nuntil\na'), Document(page_content='unless\nand\nuntil\na\ndefinitive\nwritten\nagreement\nis\nexecuted\nand\ndelivered\nby\neach\nof\nthe\nparties'), Document(page_content='of\nthe\nparties \nher

In [11]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(openai_api_key=api_key)

In [12]:
# Extract the page_content from each chunked Document object
chunked_texts_strings = [doc.page_content for doc in chunked_texts]

# Embed each chunk
embeddings = embeddings_model.embed_documents(chunked_texts_strings)

# Now, embeddings is a list of embeddings for each chunk
print(len(embeddings), len(embeddings[0]))

2828 1536


In [13]:
# Print the first  5 embeddings
for i, embedding in enumerate(embeddings[:5]):
    print(f"Embedding {i+1}: {embedding}")

Embedding 1: [-0.01289976032984746, -0.030943052886792687, -0.0026450989144243394, -0.0022289777261215006, -0.023916742289729807, 0.009147847464833265, -0.024612551970771595, -0.004764930064999593, 0.006429415283872209, -0.040056792526980925, 0.007026310597062878, 0.012156199880567138, -0.004307878881763082, -0.00032765288308047653, 0.027409432707209726, 0.012886117386253951, 0.002460914053638133, -0.02008296990050953, -0.013554640248786203, -0.007967699782045434, -0.003861059906221702, 0.017640815010727784, -0.03885617996314157, 0.011815116046181738, -0.004423847161936713, -0.015567029842518452, 0.036864256181783364, -0.028514541871926966, -0.0027866485287430017, 0.02525378801479431, 0.010123344474460848, -0.01442099091569817, -0.007844909564413789, -0.01170596970346612, -0.009802726452449489, -0.02274341747572251, -0.022470551153272206, 0.002801997189531642, 0.010928300498218252, -0.0033357921423726156, 0.014066264137719263, 0.005685854368930627, -0.005447096336786612, 0.009529860129

In [14]:
db = Chroma.from_documents(chunked_texts, OpenAIEmbeddings(openai_api_key=api_key))

In [15]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 8})

In [91]:
query = "Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties?"


In [92]:
retrieved_docs = retriever.invoke(query)

In [93]:
import os
import openai
from openai import OpenAI

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = api_key

openai_client = OpenAI()

In [94]:
def rag(query, retrieved_documents, model="gpt-3.5-turbo"):
    # Convert each Document object to its string representation
    information = "\n\n".join([doc.page_content for doc in retrieved_documents])
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user",
            "content": f"Question: {query}. \n Information: {information}"
        }
    ]
    response = openai_client.chat.completions.create(
        model=model,
        messages=messages,
    )
    return response

In [95]:
output = rag(query=query, retrieved_documents=retrieved_docs)



In [96]:
# Assuming 'output' is the result of your rag function
message_content = output.choices[0].message.content
print(message_content)

Under normal circumstances, the Sellers are responsible for any breach of representations and warranties made in a transaction. However, if the breach was a result of or caused by the Sellers' Representative's gross negligence, bad faith, or willful misconduct, then the Sellers' liability may be limited. In such cases, the Sellers' Representative may bear the responsibility for the breach, and the Sellers may be absolved of liability.

It is important to note that if the Sellers' Representative's actions result in any Losses during the transaction, the Sellers may not be held liable for those Losses if it can be proven that the Representative acted in gross negligence, bad faith, or willful misconduct. Additionally, a replacement Sellers' Representative may be appointed if necessary, and the Sellers would not be held liable for any actions or decisions made by the replacement Representative.

In summary, the Sellers may be held responsible for breaches of representations and warranties