# Import necessary libraries

In [8]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import Docx2txtLoader
import os

In [9]:

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [10]:
import openai
openai_api_key=openai.api_key = os.environ['OPENAI_API_KEY']

# Load file

In [11]:
document_path = "../data/Evaluation Sets/Robinson Advisory.docx"
loader = Docx2txtLoader(document_path)
documents = loader.load()

In [12]:
documents

[Document(page_content='- 2-\n\n\n\nADVISORY SERVICES AGREEMENT\n\n\n\nThis Advisory Services Agreement is entered into as of June 15th, 2023 (the “Effective Date”), by and between Cloud Investments Ltd., ID 51-426526-3, an Israeli company (the "Company"), and Mr. Jack Robinson, Passport Number 780055578, residing at 1 Rabin st, Tel Aviv, Israel, Email: jackrobinson@gmail.com ("Advisor").\n\n\n\nWhereas,\tAdvisor has expertise and/or knowledge and/or relationships, which are relevant to the Company’s business and the Company has asked Advisor to provide it with certain Advisory services, as described in this Agreement; and\n\nWhereas, \tAdvisor has agreed to provide the Company with such services, subject to the terms set forth in this Agreement.\n\n\n\nNOW THEREFORE THE PARTIES AGREE AS FOLLOWS:\n\n\n\nServices:  \n\nAdvisor shall provide to the Company, as an independent contractor, software development services, and / or any other services as agreed by the parties from time to time 

# Segmenting the document into segments:

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap = 50
)
documents = text_splitter.split_documents(documents)

In [None]:
documents

In [15]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(
            model='text-embedding-ada-002',
            api_key=openai_api_key
        )

  warn_deprecated(


In [16]:
from langchain_community.vectorstores import FAISS
vector_store = FAISS.from_documents(documents, embeddings)

In [17]:
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
# texts = text_splitter.split_documents(documents)

In [18]:
# embeddings = OpenAIEmbeddings()
# docsearch = Chroma.from_documents(texts, embeddings)

# Retrieval Chains:

In [19]:
retriever=vector_store.as_retriever(search_kwargs={"k": 1})

In [20]:
query = "What is the termination notice?"
docs = retriever.get_relevant_documents(query)
print(docs)

[Document(page_content='Term: The term of this Agreement shall commence on the Effective Date and shall continue until terminated in accordance with the provisions herein (the "Term").  \n\n\n\n\t\tTermination: Either party, at any given time, may terminate this Agreement, for any reason whatsoever, with or without cause, upon fourteen (14) days’ prior written notice. Notwithstanding the above, the Company may terminate this Agreement immediately and without prior notice if Advisor refuses or is unable to perform the Services, or is in breach of any provision of this Agreement. \n\n\n\n\t\tCompensation:', metadata={'source': '../data/Evaluation Sets/Robinson Advisory.docx'})]


In [21]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)


  warn_deprecated(


In [22]:
query = "Who are the parties to the Agreement and what are their defined names?"
qa.run(query)

  warn_deprecated(


' The parties to the Agreement are Cloud Investments Ltd. (defined as "Company") and Advisor (defined as "Advisor").'

In [23]:
query = "What is the termination notice?"
qa.run(query) 


"\nThe termination notice is 14 days' prior written notice."

In [24]:
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [25]:

query = "What is the termination notice?"
docs = retriever.get_relevant_documents(query)
pretty_print_docs(docs)



Document 1:

Term: The term of this Agreement shall commence on the Effective Date and shall continue until terminated in accordance with the provisions herein (the "Term").  



		Termination: Either party, at any given time, may terminate this Agreement, for any reason whatsoever, with or without cause, upon fourteen (14) days’ prior written notice. Notwithstanding the above, the Company may terminate this Agreement immediately and without prior notice if Advisor refuses or is unable to perform the Services, or is in breach of any provision of this Agreement. 



		Compensation:


In [26]:
response = qa.run(query)
response

" The termination notice is fourteen (14) days' prior written notice."

# RAGAS

In [27]:
json_file="../Context/context.json"


In [28]:
import json

json_file = "../Context/context.json"

with open(json_file, "r") as f:
    data = json.load(f)

questions, ground_truth = [], []

for item in data:
    if "question" in item:  # Check if "question" key exists
        questions.append(item["question"])
    if "answer" in item:  # Check if "answer" key exists
        ground_truth.append(item["answer"])

print("Questions:")
for question in questions:
    print(question)

print("\nGround Truth:")
for answer in ground_truth:
    print(answer)


Questions:
WHO ARE THE PARTIES TO THE AGREEMENT AND WHAT ARE THEIR DEFINED NAMES?
WHAT IS THE TERMINATION NOTICE?
What are the payments to the Advisor under the Agreement?

Ground Truth:
CLOUD INVESTMENTS LTD. ("COMPANY") AND JACK ROBINSON ("ADVISOR")
According to section 4: 14 days for convenience by both parties. The Company may terminate without notice if the Advisor refuses or cannot perform the Services or is in breach of any provision of this Agreement.
According to section 6: 1. Fees of $9 per hour up to a monthly limit of $1,500, 2. Workspace expense of $100 per month, 3. Other reasonable and actual expenses if approved by the company in writing and in advance.


In [29]:
import json

json_file = "../Context/context.json"  # Replace with your actual path

with open(json_file, "r") as f:
    data = json.load(f)

questions = []  # List to store extracted questions
for item in data:
    if "question" in item:
        questions.append(item["question"])


In [30]:
context = []
for question in questions:
    # Preprocess question (e.g., lowercase, remove punctuation)
    processed_question = question.lower().strip()

    # Call retriever function with processed question
    relevant_docs = retriever.get_relevant_documents(processed_question)

    # Extract and store only page_content
    context.extend([doc.page_content for doc in relevant_docs])

# Now the context list contains only page_content strings
print(context)


['Governing Law and Jurisdiction:  This Agreement shall be governed by the laws of the State of Israel, without giving effect to the rules respecting conflicts of laws. The parties consent to the exclusive jurisdiction and venue of Tel Aviv courts for any lawsuit filed arising from or relating to this Agreement.  \t\n\n\n\nNotices: Notices under this Agreement shall be delivered to the party’s email address as follows: Company: info@cloudcorp.com, Advisor: jackrobinson@gmail.com, or in any the other means with a proof of acceptance by the other party.\n\n\n\nIN WITNESS WHEREOF the parties have executed this Agreement as of the date first above written.\n\n\n\n\n\n\n\nCloud Investments Ltd.\t\t\t\tAdvisor', 'Term: The term of this Agreement shall commence on the Effective Date and shall continue until terminated in accordance with the provisions herein (the "Term").  \n\n\n\n\t\tTermination: Either party, at any given time, may terminate this Agreement, for any reason whatsoever, with o

In [31]:
# context = []
# for question in questions:
#     # Preprocess question (e.g., lowercase, remove punctuation)
#     processed_question = question.lower().strip()

#     # Call retriever function with processed question
#     relevant_docs = retriever.get_relevant_documents(processed_question)
#     context.extend(relevant_docs)
#     # pretty_print_docs(relevant_docs)
# context

   


In [32]:
answers = []  # Create an empty list to store answers for each question
for question in questions:
    # Preprocess question (e.g., lowercase, remove punctuation)
    processed_question = question.lower().strip()

    # Call retriever function with processed question and context (if needed)
    answer = qa.run(processed_question)  # Include context if necessary

    # Append answer to the answers list
    answers.append(answer)

answers  # This will print a list of answers for each question


[' The parties to the agreement are Cloud Investments Ltd. and the Advisor. Their defined names are "Company" and "Advisor," respectively.',
 ' The termination notice is fourteen (14) days.',
 ' The payments to the advisor under the agreement are reduced retroactively so that 60% is allocated as salary payments and 40% is for other statutory rights and benefits as an employee of the company throughout the term. The company also has the right to offset any amounts due to the advisor from any payments made under the agreement. Additionally, the advisor is responsible for indemnifying the company for any losses or expenses incurred if an alleged employer/employee relationship is found to exist between them.']

In [33]:
print(type(answers))

<class 'list'>


In [34]:
context_lists = []

for question in questions:
    # Preprocess question (e.g., lowercase, remove punctuation)
    processed_question = question.lower().strip()

    # Call retriever function with processed question
    relevant_docs = retriever.get_relevant_documents(processed_question)

    # Create a new list for this question's context
    context_for_question = []

    # Extract and store only page_content (assuming Document has a page_content attribute)
    context_for_question.extend([doc.page_content for doc in relevant_docs])

    # Append the question's context list with only page_content to the main list
    context_lists.append(context_for_question)

# Now context_lists contains lists of page_content strings for each question

context_lists



[['Governing Law and Jurisdiction:  This Agreement shall be governed by the laws of the State of Israel, without giving effect to the rules respecting conflicts of laws. The parties consent to the exclusive jurisdiction and venue of Tel Aviv courts for any lawsuit filed arising from or relating to this Agreement.  \t\n\n\n\nNotices: Notices under this Agreement shall be delivered to the party’s email address as follows: Company: info@cloudcorp.com, Advisor: jackrobinson@gmail.com, or in any the other means with a proof of acceptance by the other party.\n\n\n\nIN WITNESS WHEREOF the parties have executed this Agreement as of the date first above written.\n\n\n\n\n\n\n\nCloud Investments Ltd.\t\t\t\tAdvisor'],
 ['Term: The term of this Agreement shall commence on the Effective Date and shall continue until terminated in accordance with the provisions herein (the "Term").  \n\n\n\n\t\tTermination: Either party, at any given time, may terminate this Agreement, for any reason whatsoever, wi

In [35]:
ground_truths_lists = [[item] for item in ground_truth]

# Print the modified list of lists
print("Ground Truths (List of Lists):")
for sublist in ground_truths_lists:
    print(sublist)

Ground Truths (List of Lists):
['CLOUD INVESTMENTS LTD. ("COMPANY") AND JACK ROBINSON ("ADVISOR")']
['According to section 4: 14 days for convenience by both parties. The Company may terminate without notice if the Advisor refuses or cannot perform the Services or is in breach of any provision of this Agreement.']
['According to section 6: 1. Fees of $9 per hour up to a monthly limit of $1,500, 2. Workspace expense of $100 per month, 3. Other reasonable and actual expenses if approved by the company in writing and in advance.']


In [36]:
print(type(ground_truths_lists))

<class 'list'>


In [37]:
from datasets import Dataset
data = {
    "question": questions, # list 
    "answer": answers, # list
    "contexts": context_lists, # list list
    "ground_truths": ground_truths_lists # list Lists
}

# Convert dict to dataset
dataset = Dataset.from_dict(data)

  from .autonotebook import tqdm as notebook_tqdm


# Evaluation

In [38]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

result = evaluate(
    dataset = dataset, 
    metrics=[
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
        
    ],
)

df = result.to_pandas()

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 12/12 [00:06<00:00,  1.87it/s]


In [39]:
df

Unnamed: 0,question,answer,contexts,ground_truths,ground_truth,context_precision,context_recall,faithfulness,answer_relevancy
0,WHO ARE THE PARTIES TO THE AGREEMENT AND WHAT ...,The parties to the agreement are Cloud Invest...,[Governing Law and Jurisdiction: This Agreeme...,"[CLOUD INVESTMENTS LTD. (""COMPANY"") AND JACK R...","CLOUD INVESTMENTS LTD. (""COMPANY"") AND JACK RO...",1.0,1.0,,0.934953
1,WHAT IS THE TERMINATION NOTICE?,The termination notice is fourteen (14) days.,[Term: The term of this Agreement shall commen...,[According to section 4: 14 days for convenien...,According to section 4: 14 days for convenienc...,1.0,1.0,1.0,0.87681
2,What are the payments to the Advisor under the...,The payments to the advisor under the agreeme...,[payments to Advisor hereunder shall be reduce...,[According to section 6: 1. Fees of $9 per hou...,According to section 6: 1. Fees of $9 per hour...,0.0,0.0,1.0,0.958198
