In [6]:
from langchain.document_loaders import UnstructuredWordDocumentLoader
import csv
import re
import os
import openai
import pandas as pd
import nest_asyncio

from dotenv import load_dotenv

In [7]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

In [8]:
from docx import Document

docx_file_path = '/Users/Aaron/ContractAdvisorRAG/data/Robinson Advisory.docx'

def load_document(docx_file_path):
    document = Document(docx_file_path)
    text = ""
    for paragraph in document.paragraphs:
        text += paragraph.text + "\n"
    return text

doc_text = load_document(docx_file_path)

In [9]:
len(doc_text)

14159

In [10]:
df = pd.read_csv('../data/contra1.csv')
df

Unnamed: 0,Questions,Answers,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5
0,Who are the parties to the Agreement and what ...,Cloud Investments Ltd. (“Company”) and Jack Ro...,,,,
1,What is the termination notice?,According to section 4:14 days for convenience...,,,,
2,What are the payments to the Advisor under the...,According to section 6: 1. Fees of $9 per hour...,,,,
3,Can the Agreement or any of its obligations b...,1. Under section 1.1 the Advisor can’t assign ...,,,,
4,Who owns the IP?,According to section 4 of the Undertaking (Ap...,,,,
5,Is there a non-compete obligation to the Advisor?,Yes. During the term of engagement with the Co...,,,,
6,Can the Advisor charge for meal time?,"No. See Section 6.1, Billable Hour doesn’t inc...",,,,
7,In which street does the Advisor live?,"1 Rabin st, Tel Aviv, Israe",,,,
8,Is the Advisor entitled to social benefits?,"No. According to section 8 of the Agreement, t...",,,,
9,What happens if the Advisor claims compensatio...,If the Advisor is determined to be an employee...,,,,


In [11]:
df.drop(['Unnamed: 2','Unnamed: 3','Unnamed: 4','Unnamed: 5'], axis = 1, inplace=True) 

In [12]:
df.head()

Unnamed: 0,Questions,Answers
0,Who are the parties to the Agreement and what ...,Cloud Investments Ltd. (“Company”) and Jack Ro...
1,What is the termination notice?,According to section 4:14 days for convenience...
2,What are the payments to the Advisor under the...,According to section 6: 1. Fees of $9 per hour...
3,Can the Agreement or any of its obligations b...,1. Under section 1.1 the Advisor can’t assign ...
4,Who owns the IP?,According to section 4 of the Undertaking (Ap...


In [13]:

nest_asyncio.apply()

In [21]:
from langchain.document_loaders import UnstructuredWordDocumentLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

# Define the path to your .docx file
docx_file_path = '/Users/Aaron/ContractAdvisorRAG/data/Robinson Advisory.docx'

# Define the loader for the .docx file
loader = UnstructuredWordDocumentLoader(file_path=docx_file_path)

# Create the index from the loader
index = VectorstoreIndexCreator().from_loaders([loader])

# Create the ChatOpenAI instance
llm = ChatOpenAI(temperature=0)

# Create the RetrievalQA chain
retrieval_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=index.vectorstore.as_retriever(),
    return_source_documents=True,
)


In [29]:
questions = df['Questions'].tolist()
answers = df['Answers'].tolist()

In [30]:
examples  = [
    {
        "query": q, "ground_truths": [answers[i]]
    }
    for i, q in enumerate(questions)
]

In [31]:
from ragas.langchain.evalchain import RagasEvaluatorChain
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)

faithfulness_chain = RagasEvaluatorChain(metric=faithfulness)
answer_rel_chain = RagasEvaluatorChain(metric=answer_relevancy)
context_rel_chain = RagasEvaluatorChain(metric=context_precision)
context_recall_chain = RagasEvaluatorChain(metric=context_recall)


In [32]:
predictions = retrieval_chain.batch(examples)

In [33]:
faithfulness_score = faithfulness_chain.evaluate(examples, predictions)
answer_relevancy_score = answer_rel_chain.evaluate(examples, predictions)
context_precision_score = context_rel_chain.evaluate(examples, predictions)
context_recall_score = context_recall_chain.evaluate(examples, predictions)

100%|██████████| 1/1 [00:15<00:00, 15.45s/it]
100%|██████████| 1/1 [00:17<00:00, 17.70s/it]
100%|██████████| 1/1 [00:04<00:00,  4.48s/it]
0it [00:00, ?it/s]


In [34]:
faithfulness_scores = [score["faithfulness_score"] for score in faithfulness_score]
answer_relevancy_scores = [score["answer_relevancy_score"] for score in answer_relevancy_score]
context_precision_scores = [score["context_precision_score"] for score in context_precision_score]
context_recall_scores = [score["context_recall_score"] for score in context_recall_score]

In [38]:
df_new = pd.DataFrame({
        "Faithfulness Score": faithfulness_scores,
         "Context Precision Score": context_precision_scores,
        "Answer Relevancy Score": answer_relevancy_scores
       
    })

In [39]:
df_new

Unnamed: 0,Faithfulness Score,Context Precision Score,Answer Relevancy Score
0,0.666667,0.416667,0.953996
1,1.0,0.0,0.880023
2,1.0,0.0,0.950465
3,1.0,1.0,0.910492
4,1.0,0.0,0.920332
5,1.0,1.0,1.0
6,1.0,0.0,1.0
7,0.0,0.0,0.0
8,1.0,0.416667,0.969224
9,1.0,0.333333,0.905978


In [40]:
df2 = pd.DataFrame(examples)

In [41]:
df2

Unnamed: 0,query,ground_truths
0,Who are the parties to the Agreement and what ...,[Cloud Investments Ltd. (“Company”) and Jack R...
1,What is the termination notice?,[According to section 4:14 days for convenienc...
2,What are the payments to the Advisor under the...,[According to section 6: 1. Fees of $9 per hou...
3,Can the Agreement or any of its obligations b...,[1. Under section 1.1 the Advisor can’t assign...
4,Who owns the IP?,[ According to section 4 of the Undertaking (A...
5,Is there a non-compete obligation to the Advisor?,[Yes. During the term of engagement with the C...
6,Can the Advisor charge for meal time?,"[No. See Section 6.1, Billable Hour doesn’t in..."
7,In which street does the Advisor live?,"[1 Rabin st, Tel Aviv, Israe]"
8,Is the Advisor entitled to social benefits?,"[No. According to section 8 of the Agreement, ..."
9,What happens if the Advisor claims compensatio...,[If the Advisor is determined to be an employe...


In [44]:
df_concat = pd.concat([df2, df], axis=1)

In [45]:
df_concat

Unnamed: 0,query,ground_truths,Faithfulness Score,Answer Relevancy Score,Context Precision Score
0,Who are the parties to the Agreement and what ...,[Cloud Investments Ltd. (“Company”) and Jack R...,0.666667,0.953996,0.416667
1,What is the termination notice?,[According to section 4:14 days for convenienc...,1.0,0.880023,0.0
2,What are the payments to the Advisor under the...,[According to section 6: 1. Fees of $9 per hou...,1.0,0.950465,0.0
3,Can the Agreement or any of its obligations b...,[1. Under section 1.1 the Advisor can’t assign...,1.0,0.910492,1.0
4,Who owns the IP?,[ According to section 4 of the Undertaking (A...,1.0,0.920332,0.0
5,Is there a non-compete obligation to the Advisor?,[Yes. During the term of engagement with the C...,1.0,1.0,1.0
6,Can the Advisor charge for meal time?,"[No. See Section 6.1, Billable Hour doesn’t in...",1.0,1.0,0.0
7,In which street does the Advisor live?,"[1 Rabin st, Tel Aviv, Israe]",0.0,0.0,0.0
8,Is the Advisor entitled to social benefits?,"[No. According to section 8 of the Agreement, ...",1.0,0.969224,0.416667
9,What happens if the Advisor claims compensatio...,[If the Advisor is determined to be an employe...,1.0,0.905978,0.333333
