In [1]:
# Optional: LangSmith API keys
import os
from dotenv import load_dotenv

load_dotenv(override=True)

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

## Document Loading

In [2]:
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = DirectoryLoader('../data/robinson', show_progress=True)
documents = loader.load()


100%|██████████| 1/1 [00:03<00:00,  3.48s/it]


In [3]:
# from langchain_community.document_loaders import PyPDFLoader
# from langchain.text_splitter import RecursiveCharacterTextSplitter

# loader = PyPDFLoader("../data/Robinson Advisory.pdf")
# pages = loader.load()
# for page in pages:
#     # just change the page_content to remove the newlines inserted instead of spaces
#     page.page_content = page.page_content.replace('\n', ' ')
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
# chunks = text_splitter.split_documents(pages)

## Splitting

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
print(len(chunks))
chunks

21


[Document(page_content='ADVISORY SERVICES AGREEMENT\n\nThis Advisory Services Agreement is entered into as of June 15th, 2023 (the “Effective Date”), by and between Cloud Investments Ltd., ID 51-426526-3, an Israeli company (the "Company"), and Mr. Jack Robinson, Passport Number 780055578, Israel, Email: jackrobinson@gmail.com ("Advisor").\n\nresiding\n\nat 1 Rabin st, Tel Aviv,\n\nWhereas,\n\nAdvisor has expertise and/or knowledge and/or relationships, which are relevant to the Company’s business and the Company has asked Advisor to provide it with certain Advisory services, as described in this Agreement; and\n\nWhereas,\n\nAdvisor has agreed to provide the Company with such services, subject to the terms set forth in this Agreement.\n\nNOW THEREFORE THE PARTIES AGREE AS FOLLOWS:\n\n1. Services:\n\n1.1', metadata={'source': '../data/robinson/Robinson Advisory.pdf'}),
 Document(page_content='Whereas,\n\nAdvisor has agreed to provide the Company with such services, subject to the terms

In [5]:
print(max([len(chunk.page_content) for chunk in chunks]))

965


## Getting the number of characters for each chunk

In [6]:
for chunk in chunks:
    print("The chunk has %s characters" % len(chunk.page_content))

The chunk has 771 characters
The chunk has 753 characters
The chunk has 618 characters
The chunk has 812 characters
The chunk has 965 characters
The chunk has 583 characters
The chunk has 836 characters
The chunk has 922 characters
The chunk has 774 characters
The chunk has 791 characters
The chunk has 550 characters
The chunk has 691 characters
The chunk has 924 characters
The chunk has 479 characters
The chunk has 880 characters
The chunk has 728 characters
The chunk has 728 characters
The chunk has 616 characters
The chunk has 924 characters
The chunk has 711 characters
The chunk has 863 characters


## Vector Store

In [7]:
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-m3"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

embedding_function = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    # model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# storing the chunks in a vector store
from langchain_community.vectorstores.faiss import FAISS

# FAISS.from_documents(pages, model)
# save to disk
db = FAISS.from_documents(chunks, embedding_function)
db.save_local("../data/faiss_index")


In [27]:
retriever = db.as_retriever(search_kwargs={"k": 5})

In [29]:
docs = retriever.get_relevant_documents("What are the payments to the Advisor under the Agreement?")
print(docs)

[Document(page_content='- 3-\n\nconstitute salary payments, and 40% of such payments shall constitute payment by the Company for all other Advisor statutory rights and benefits as employee of the Company throughout the Term. Advisor further consents that the Company may offset any amounts due to him under this Section from any amounts payable to Advisor under this Agreement. Advisor shall indemnify the Company for any loss or expenses incurred by the Company if it were determined that an alleged employer/employee relationship existed between the Advisor and the Company.', metadata={'source': '../data/robinson/Robinson Advisory.pdf'}), Document(page_content='6. Compensation:\n\n6.1\n\nAs full and sole consideration for the Services, and subject to the performance of the Services, the Company shall pay the Advisor, against an invoice provided to the Company by Advisor, hourly fees at a rate of USD 9 (nine) per Billable Hour as defined below, limited to a maximum of USD 1,500 per month (t

## RAG Chain

In [9]:
from langchain_community.chat_models import ChatOllama

# Local LLM
ollama_llm = "mistral:instruct"
model_local = ChatOllama(model=ollama_llm)

In [11]:
# loading the db
vector_store = FAISS.load_local("../data/faiss_index", embedding_function)
retriever= vector_store.as_retriever(search_kwargs={"k": 5})

## History Aware Retriever
##### If there is no chat_history, then the input is just passed directly to the retriever. If there is chat_history, then the prompt and LLM will be used to generate a search query. That search query is then passed to the retriever.

In [12]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt = ChatPromptTemplate.from_messages([
      MessagesPlaceholder(variable_name="chat_history"),
      ("user", "{input}"),
      ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
    ])
    
retriever_chain = create_history_aware_retriever(model_local, retriever, prompt)

In [14]:
prompt = ChatPromptTemplate.from_messages([
      ("system", "Answer the user's questions based on the below context:\n\n{context}"),
      MessagesPlaceholder(variable_name="chat_history"),
      ("user", "{input}"),
    ])
    
stuff_documents_chain = create_stuff_documents_chain(model_local,prompt)
    
conversation_rag_chain = create_retrieval_chain(retriever_chain, stuff_documents_chain)

In [18]:
conversation_rag_chain.invoke({
        "chat_history": [''],
        "input": 'Who are the parties to the Agreement and what are their defined names?'
    })

{'chat_history': [''],
 'input': 'Who are the parties to the Agreement and what are their defined names?',
 'context': [Document(page_content='IP: Any Work Product, upon creation, shall be fully and exclusively owned by the Company. The Advisor, immediately upon Company’s request, shall sign any document and/or perform any action needed to formalize such ownership. The Advisor shall not obtain any rights in the Work Product, including moral rights and/or rights for royalties or other consideration under any applicable law (including Section 134 of the Israeli Patent Law – 1967 if applicable), and shall not be entitled to any compensation with respect to the Services, which was not specifically agreed, in writing, between the Advisor and the Company.', metadata={'source': '../data/robinson/Robinson Advisory.pdf'}),
  Document(page_content='1. Definitions: (a) Company’s Business: development of an AI-based contract assistant. (b) Services: Software development services and/or any other s

## Normal Chain

In [45]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


# Prompt
template = """Answer the question (by giving reference to the context you used) based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)


In [46]:
# Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model_local
    | StrOutputParser()
)

In [51]:
chain.invoke('Who are the parties to the Agreement and what are their defined names?')

' The parties to the Agreement are Cloud Investments Ltd., identified as "the Company," and Mr. Jack Robinson, identified as "Advisor." These definitions are provided in the context of the first document, specifically in the line "This Advisory Services Agreement is entered into as of June 15th, 2023, by and between Cloud Investments Ltd., ID 51-426526-3, an Israeli company (the "Company"), and Mr. Jack Robinson, Passport Number 780055578, Israel, Email: jackrobinson@gmail.com ("Advisor")."'

In [52]:
chain.invoke('What is the termination notice?')

' According to the context provided, either party can terminate this Agreement with fourteen (14) days’ prior written notice, as stated in sections 4 and 5 of the documents from the Robinson Advisory agreement. The Company may also terminate the Agreement immediately and without prior notice if Advisor refuses or is unable to perform the Services, or is in breach of any provision of this Agreement. (References: Document 1 page 2-4, Document 2 page 2-4)'

In [47]:
chain.invoke("What are the payments to the Advisor under the Agreement?")

" The Advisor is entitled to hourly fees at a rate of USD 9 per Billable Hour, with a maximum limit of USD 1,500 per month, and an additional USD 100 per month for workspace expenses, as mentioned in Document 6.1 of the context. There are no other express provisions for additional fees or expense reimbursement. (Document 6.1, metadata={'source': '../data/robinson/Robinson Advisory.pdf'}). The Advisor is also responsible for any income taxes or assessments, expenses, and costs related to their services as mentioned in Document 6.4. (Document 6.4, metadata={'source': '../data/robinson/Robinson Advisory.pdf'})"

In [53]:
chain.invoke('Can the Agreement or any of its obligations be assigned?')

" According to the provided context from the documents, the Agreement may only be assigned by the Company to a successor of all or substantially all of its assets or business, and the assignee must have assumed the Company's obligations under this Agreement. The Advisor may not assign the Agreement to anyone for any reason whatsoever. (Reference: Document 1, page_content under 'Entire Agreement; No Waiver or Assignment')."

In [54]:
chain.invoke('Who owns the IP?')

' According to the context provided, the Company owns the IP (Intellectual Property) upon creation, and the Advisor is required to transfer ownership to the Company by signing any necessary documents and performing any required actions. The Advisor holds no rights in the Work Product, including moral rights or royalties, and is not entitled to any compensation for services not specifically agreed to in writing between the Advisor and the Company. (References: Document 0, page_content)'

In [55]:
chain.invoke('Is there a non-compete obligation to the Advisor?')

" Yes, according to the context provided in Document 1, Section 5 of the agreement imposes a non-compete obligation on the Advisor during the term of engagement with the Company and for a period of 12 months after the engagement has ended. The Advisor is prohibited from being involved in any business that competes with the Company's Business, and from soliciting or hiring employees or service providers of the Company without prior written consent."

In [49]:
chain.invoke('Can the Advisor charge for meal time?')

' Based on the provided context, the answer is no. The term "Billable Hour" refers to the net time devoted to providing the Services without calculating meals, travels, or any other overhead time borne by the Advisor (Documents 6.1 and 6.2). Therefore, meal time is not included in the billable hours for which the Advisor can charge.'

In [48]:
chain.invoke('In which street does the Advisor live?')


' The Advisor lives on Rabin Street in Tel Aviv.'

In [50]:
chain.invoke('Is the Advisor entitled to social benefits?')

' Based on the context provided in the given documents from the Robinson Advisory Agreement, the Advisor is not entitled to social benefits from the Company. The documents state that the sole relationship between the Company and the Advisor shall be that of independent contractors, and the Advisor shall not receive or be entitled to employment benefits such as insurance, paid vacation, or severance payments from the Company (Document 3).'

In [56]:
chain.invoke('What happens if the Advisor claims compensation based on employment relationship with the Company?')

" According to the context provided, if it is adjudicated or determined by any governmental authority that the Advisor and/or anyone on Advisor's behalf is an employee of the Company, despite the parties' agreement stating they are independent contractors, then payments to the Advisor under the Agreement shall be reduced retroactively as of the beginning of the term so that 60% of such payments shall constitute salary payments, and the remaining 40% shall constitute payment for all other Advisor statutory rights and benefits (Document 1 and Document 2). Additionally, the Advisor is required to indemnify the Company for any loss or expenses incurred by the Company due to such determination (Document 1)."