In [None]:
!nvidia-smi

## Installing Libraries

To start, let's install the latest release of Haystack with `pip`:

In [None]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [None]:
%%bash

pip install --upgrade pip
pip install langchain
pip install tiktoken
# pip install -U sentence-transformers
pip install openai==0.28.1
pip install chromadb
pip install faiss-cpu

Collecting pip
  Downloading pip-23.3.2-py3-none-any.whl (2.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 16.1 MB/s eta 0:00:00
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-23.3.2
Collecting langchain
  Downloading langchain-0.0.354-py3-none-any.whl.metadata (13 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.3-py3-none-any.whl.metadata (25 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-community<0.1,>=0.0.8 (from langchain)
  Downloading langchain_community-0.0.8-py3-none-any.whl.metadata (7.3 kB)
Collecting langchain-core<0.2,>=0.1.5 (from langchain)
  Downloading langchain_core-0.1.6-py3-none-any.whl.metadata (4.0 kB)
Collecting langsmith<0.1.0,>=0.0.77 (from langchain

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.
llmx 0.0.15a0 requires openai, which is not installed.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
lida 0.0.10 requires kaleido, which is not installed.
lida 0.0.10 requires python-multipart, which is not installed.
tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.


In [None]:
# Here are some imports that we'll need
import logging
import os, shutil

logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("langchain").setLevel(logging.DEBUG)

In [None]:
!pip show openai

Name: openai
Version: 0.28.1
Summary: Python client library for the OpenAI API
Home-page: https://github.com/openai/openai-python
Author: OpenAI
Author-email: support@openai.com
License: 
Location: /usr/local/lib/python3.10/dist-packages
Requires: aiohttp, requests, tqdm
Required-by: llmx


## Testing

In [None]:
import os, shutil
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.text_splitter import NLTKTextSplitter, CharacterTextSplitter, RecursiveCharacterTextSplitter

doc_dir = "data/text_chunks"

# small params
# n = 4500
# m = 2500
# long params
n = 6000
m = 2000

preprocessor = RecursiveCharacterTextSplitter(
    chunk_size=n,
    chunk_overlap=m,
    # separators=[" "],
    keep_separator=False,
    add_start_index=False,
    strip_whitespace=False
)

file_path = '/content/sample_pdf.txt'
# converting text to langchain document format
doc_loader = TextLoader(file_path)
converted_doc = doc_loader.load()
# splitting document into chunks
docs = preprocessor.split_documents(converted_doc)


## OPTIONAL PART ##
try:
  shutil.rmtree(doc_dir)
  print("creating data folder again..")
except:
  print("creating data folder..")

os.makedirs(doc_dir)

chunks = [k.page_content for k in docs]
for chunk_id in range(len(chunks)):
  text_file_path = f'{doc_dir}/{chunk_id}.txt'
  with open(text_file_path, 'w') as f:
      f.write(chunks[chunk_id])

creating data folder again..


In [None]:
len(docs)

28

In [None]:
len(docs[1].page_content), len(docs[1].page_content.split())

(5763, 1001)

In [None]:
####
# initializing document store

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma, FAISS
from langchain.indexes import VectorstoreIndexCreator
from langchain.schema.vectorstore import VectorStoreRetriever

embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
)

# db = Chroma.from_documents(docs, embeddings)
db = FAISS.from_documents(docs, embeddings)

# retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":3})    #small
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})    #long

In [None]:
small_clauses=['DocumentName', 'EffectiveDate', 'ValidityTerm', 'LimitationOfLiability',
               'Vendor', 'Currency', 'Scope', 'AutoRenewal', 'NonRenewalNoticePeriod', 'Condition',
               'GoverningLaw', 'Update', 'Assignment', 'DelayedPayment', 'ThirdParty',
               'PublicAnnouncement', 'LiquidityDamages', 'ForceMajeure', 'TerminationNoticePeriod', 'Contract Value']
long_clauses = ['IPR', 'TerminationClause', 'LicenseGrant', 'WarrantyClause', 'Support',
                'ObligationRakuten', 'ObligationVendor', 'PaymentTerms', 'Exclusions','Indmenity']

In [None]:
# retriever.get_relevant_documents(clause_question["IPR"])

### Zero-Shot Inference

In [None]:
from langchain.llms import OpenAI, OpenAIChat
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# New Questions
clause_question = {
    "DocumentName": "What is the name/title of the agreement?",
    "EffectiveDate" : "What is the effective date of the agreement? or When does the agreement become operative?",
    "ValidityTerm": "Extract the initial validity period of the agreement (Validity Term). Convert the answer into months.",
    "GoverningLaw": "Extract the clause related to the law that governs the interpretation of the contract? (Governing Law)",
    "LiquidityDamages": "Extract the clause that would award either party liquidated damages for breach or a fee upon the termination of a contract (termination fee)?",
    "IPR": "Extract the complete clause(s) related to Intellectual Property Rights/Ownership from the contract.",
    "TerminationClause": "Extract the complete Termination/Cancellation clause(s) from the contract under which the agreement can be terminated by one or the other parties.",
    "AutoRenewal": "What will be the auto renewal term after the initial term expires? Convert the answer in months.",
    "TerminationNoticePeriod": "What is the waiting/notice period if a party can terminate this contract with or without cause (solely by giving a notice and allowing a waiting period to expire)? (Termination Notice Period)",
    "NonRenewalNoticePeriod": "What is the prior notice required to prevent the renewal of the contract (Non-Renewable Notice Period)? Convert the answer in days.",
    "LicenseGrant": "Extract the complete License Grant clause which contains all the information related to license granted by one party to its counterparty from the contract?",
    "WarrantyClause": "Extract the complete Warranty clause which contains information about warranty against defects or errors in technology, products, or services provided under the contract?",
    "ThirdParty": "Extract the complete clause related to a non-contracting party who is a beneficiary to some or all of the clauses in the contract and therefore can enforce its rights against a contracting party. (Third Party Beneficiary/Sub-Contractor/Re-consignment)",
    "LimitationOfLiability": "Extract the complete clause related to a party's liability upon the breach of its obligation in the contract? (Limitation Of Liability)",
    "Support": "Extract the clause related one more party offering Support/Maintenance services or goods to another party mentioned in the contract. (if any)",
    "ObligationRakuten": "Extract the clause(s) related to the obligations/duties/responsibilities of Rakuten from the contract.",
    "ObligationVendor": "Extract the clause(s) related to the obligations/duties/responsibilities of Vendor(party/parties other than Rakuten) from the contract.",
    "Assignment": "Extract the clause related to the provision regarding assignment mentioned in the contract?",
    "PublicAnnouncement": "Extract the full clause related to publicity or public announcement in the contract.(if any)",
    "ForceMajeure": "Extract the full clause related to Force Majeure in the contract. (Force Majeure)",
    "Vendor": "Extract the name(s) of one or more parties which signed the contract as vendor (not as the Assigner).",
    "PaymentTerms": "Extract the complete Payment Terms clause which contains information related to Payment/Invoice/Fee from the contract?",
    "Indmenity": "Extract the complete indemnification clause which contains contractual obligation of one party to indemnify the other party.",
    "Currency": "Which country's currency will be used for payment? Answer in 3-letter currency code. Example: USD,JPY,EUR,INR",
    "Update": "Find the clause about updation of services. (Update/Upgrade)",
    "DelayedPayment": "Extract the information related to the consequences of delay in payment (Delay/Delinquency Charges)?",
    "Exclusions": "Extract complete clause related to Exclusions/Anti-Social which seek to prevent the misuse of services/software.",
    "Scope": "Extract the complete Scope/Purpose/Entire Agreement clause from this contract?",
    "SLA": "Extract complete SLA clause.",
    "Condition": "Extract the complete clause related to the terms of the agreement (which includes validity term, auto-renewal term and non-renewable notice period) present in the contract with conditions.",
    "Contract Value": "What is the total amount of contract?"
}

In [None]:
from langchain.prompts import PromptTemplate

# Build prompt
prompt_template = """Follow the following set of instructions that describes a task:
1. Use the following pieces of context to extract an answer to the question at the end.
2. Do not add any explaination/justification for answers and do not attempt to asnwer more than than what is asked.
3. If you don't know the answer or if the answer is not present in the given context, just say "NA", don't try to make up an answer.

Context: {context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
print ("------- Prompt Begin -------")

final_prompt = PROMPT.format(context='CONTEXT', question=clause_question["TerminationClause"])
print(final_prompt)

print ("------- Prompt End -------")

------- Prompt Begin -------
Follow the following set of instructions that describes a task:
1. Use the following pieces of context to extract an answer to the question at the end.
2. Do not add any explaination/justification for answers and do not attempt to asnwer more than than what is asked.
3. If you don't know the answer or if the answer is not present in the given context, just say "NA", don't try to make up an answer.

Context: CONTEXT

Question: Extract the complete Termination/Cancellation clause(s) from the contract under which the agreement can be terminated by one or the other parties.
Answer:
------- Prompt End -------


In [None]:
# llm = ChatOpenAI(model_name="gpt-4", temperature=0.1, max_tokens=None)#, best_of=3)
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.1, max_tokens=-1)
# llm = OpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2, max_tokens=512)
# llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.2, max_tokens=1280, best_of=1)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": PROMPT, "verbose": False})

query = clause_question["TerminationClause"]
result = qa({"query": query})
result

In [None]:
import csv
import time
fields = ['Entity/Clause', 'Query', 'Results']
with open('results_ss_long.csv', mode='w', encoding='utf-8') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(fields)
    for k in long_clauses:
        query = clause_question[k]
        result = qa({"query": query})
        csvwriter.writerows([[k,query,result['result'].lstrip()]])
        print(result['result'])
        time.sleep(60)

### Few-Shot Inference

In [None]:
all_examples = {
    'DocumentName':
     [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['DocumentName']}",
        "answer": "Statement of Work"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['DocumentName']}",
        "answer": "MASTER SOFTWARE LICENSE AND SUPPORT AGREEMENT"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['DocumentName']}",
        "answer": "Confidentiality Agreement"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['DocumentName']}",
        "answer": "Amendment to the interconnection agreement"
        },
    ],
    'EffectiveDate':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "June 30, 2021"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "24/9/2019"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "December 24, 2021"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "July 1st, 2020"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "NA"
        },
    ],
    'ValidityTerm':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['ValidityTerm']}",
        "answer": "one year"
        },
         {
        "context": """CONTEXT""",
        "question": f"{clause_question['ValidityTerm']}",
        "answer": "five (5) years"
        },
         {
        "context": """CONTEXT""",
        "question": f"{clause_question['ValidityTerm']}",
        "answer": "NA"
        },
    ],
    'Vendor':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['Vendor']}",
        "answer": "NEC Corporation"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['Vendor']}",
        "answer": "Airspan Networks Inc."
        },
    ],
    'Currency':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['Currency']}",
        "answer": "Japanese Yen"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['Currency']}",
        "answer": "US dollars"
        },
        {
        "context": """“CONTEXT""",
        "question": f"{clause_question['Currency']}",
        "answer": "USD"
        },
    ],
    'AutoRenewal':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['AutoRenewal']}",
        "answer": "two (2) year"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['AutoRenewal']}",
        "answer": "an additional year"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['AutoRenewal']}",
        "answer": "NA"
        },
    ],
    'NonRenewalNoticePeriod':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['NonRenewalNoticePeriod']}",
        "answer": "9 months"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['NonRenewalNoticePeriod']}",
        "answer": "forty-five (45) days"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['NonRenewalNoticePeriod']}",
        "answer": "NA"
        },
    ],
    'TerminationNoticePeriod':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['TerminationNoticePeriod']}",
        "answer": "thirty (30) days"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['TerminationNoticePeriod']}",
        "answer": "60 days"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['TerminationNoticePeriod']}",
        "answer": "three months"
        },
    ]

}

In [None]:
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate

# Create example prompts
examples = all_examples['AutoRenewal']


example_template = """
Context: {context}
Question: {question}
Answer: {answer}
"""

example_prompt = PromptTemplate(
    input_variables=["context","question", "answer"],
    template=example_template
)

In [None]:
# print(example_prompt.format(context='C',question='q',answer='a'))

In [None]:
# Create a prefix and suffix for the prompt
prefix = """Follow the following set of instructions that describes a task:
1. Use the following pieces of context to extract an answer to the question that follows.
2. Do not add any explaination/justification for answers and do not attempt to asnwer more than than what is asked.
3. If you don't know the answer or if the answer is not present in the given context, just say "NA", don't try to make up an answer.

Here are some examples:
"""

suffix = """\n\nNow, use the context given below to extract an answer to the question that follows.\n
Context: {context}
Question: {question}
Answer: """

# Create the FewShotPromptTemplate
few_shot_prompt_template = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["context", "question"],
    example_separator=""
)

In [None]:
print ("------- Prompt Begin -------")

final_prompt = few_shot_prompt_template.format(context='CONTEXT', question=clause_question["AutoRenewal"])
print(final_prompt)

print ("------- Prompt End -------")

In [None]:
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.1, max_tokens=512)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": few_shot_prompt_template, "verbose": False},
)

query = clause_question["AutoRenewal"]
result = qa({"query": query})

INFO:langchain.utils.math:Unable to import simsimd, defaulting to NumPy implementation. If you want to use simsimd please install with `pip install simsimd`.
INFO:langchain.utils.math:Unable to import simsimd, defaulting to NumPy implementation. If you want to use simsimd please install with `pip install simsimd`.
INFO:langchain.utils.math:Unable to import simsimd, defaulting to NumPy implementation. If you want to use simsimd please install with `pip install simsimd`.


In [None]:
result

{'query': 'What is the auto renewal term after the initial term expires? Convert the answer in months.',
 'result': '36 months'}

### Few-Shot Inference (with example number)

In [None]:
from langchain.llms import OpenAI, OpenAIChat
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# New Questions
clause_question = {
    "DocumentName": "What is the name/title of the agreement?",
    "EffectiveDate" : "What is the effective date of the agreement? or When does the agreement become operative?",
    "ValidityTerm": "Extract the initial validity period of the agreement (Validity Term). Convert the answer into months.",
    "GoverningLaw": "Extract the clause related to the law that governs the interpretation of the contract? (Governing Law)",
    "LiquidityDamages": "Extract the clause that would award either party liquidated damages for breach or a fee upon the termination of a contract (termination fee)?",
    "IPR": "Extract the complete clause(s) related to Intellectual Property Rights/Ownership from the contract.",
    "TerminationClause": "Extract the complete Termination/Cancellation clause(s) from the contract under which the agreement can be terminated by one or the other parties.",
    "AutoRenewal": "What will be the auto renewal term after the initial term expires? Convert the answer in months.",
    "TerminationNoticePeriod": "What is the waiting/notice period if a party can terminate this contract with or without cause (solely by giving a notice and allowing a waiting period to expire)? (Termination Notice Period)",
    "NonRenewalNoticePeriod": "What is the prior notice required to prevent the renewal of the contract (Non-Renewable Notice Period)? Convert the answer in days.",
    "LicenseGrant": "Extract the complete License Grant clause which contains all the information related to license granted by one party to its counterparty from the contract?",
    "WarrantyClause": "Extract the complete Warranty clause which contains information about warranty against defects or errors in technology, products, or services provided under the contract?",
    "ThirdParty": "Extract the complete clause related to a non-contracting party who is a beneficiary to some or all of the clauses in the contract and therefore can enforce its rights against a contracting party. (Third Party Beneficiary/Sub-Contractor/Re-consignment)",
    "LimitationOfLiability": "Extract the complete clause related to a party's liability upon the breach of its obligation in the contract? (Limitation Of Liability)",
    "Support": "Extract the clause related one more party offering Support/Maintenance services or goods to another party mentioned in the contract. (if any)",
    "ObligationRakuten": "Extract the clause(s) related to the obligations/duties/responsibilities of Rakuten from the contract.",
    "ObligationVendor": "Extract the clause(s) related to the obligations/duties/responsibilities of Vendor(party/parties other than Rakuten) from the contract.",
    "Assignment": "Extract the clause related to the provision regarding assignment mentioned in the contract?",
    "PublicAnnouncement": "Extract the full clause related to publicity or public announcement in the contract.(if any)",
    "ForceMajeure": "Extract the full clause related to Force Majeure in the contract. (Force Majeure)",
    "Vendor": "Extract the name(s) of one or more parties which signed the contract as vendor (not as the Assigner).",
    "PaymentTerms": "Extract the complete Payment Terms clause which contains information related to Payment/Invoice/Fee from the contract?",
    "Indmenity": "Extract the complete indemnification clause which contains contractual obligation of one party to indemnify the other party.",
    "Currency": "Which country's currency will be used for payment? Answer in 3-letter currency code. Example: USD,JPY,EUR,INR",
    "Update": "Find the clause about updation of services. (Update/Upgrade)",
    "DelayedPayment": "Extract the information related to the consequences of delay in payment (Delay/Delinquency Charges)?",
    "Exclusions": "Extract complete clause related to Exclusions/Anti-Social which seek to prevent the misuse of services/software.",
    "Scope": "Extract the complete Scope/Purpose/Entire Agreement clause from this contract?",
    "SLA": "Extract complete SLA clause.",
    "Condition": "Extract the complete clause related to the terms of the agreement (which includes validity term, auto-renewal term and non-renewable notice period) present in the contract with conditions.",
    "Contract Value": "What is the total amount of contract?"
}

In [None]:
all_examples = {
    'DocumentName':
     [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['DocumentName']}",
        "answer": "Statement of Work"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['DocumentName']}",
        "answer": "MASTER SOFTWARE LICENSE AND SUPPORT AGREEMENT"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['DocumentName']}",
        "answer": "Confidentiality Agreement"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['DocumentName']}",
        "answer": "Amendment to the interconnection agreement"
        },
    ],
    'EffectiveDate':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "June 30, 2021"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "24/9/2019"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "December 24, 2021"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "July 1st, 2020"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['EffectiveDate']}",
        "answer": "NA"
        },
    ],
    'ValidityTerm':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['ValidityTerm']}",
        "answer": "one year"
        },
         {
        "context": """CONTEXT""",
        "question": f"{clause_question['ValidityTerm']}",
        "answer": "five (5) years"
        },
         {
        "context": """CONTEXT""",
        "question": f"{clause_question['ValidityTerm']}",
        "answer": "NA"
        },
    ],
    'Vendor':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['Vendor']}",
        "answer": "NEC Corporation"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['Vendor']}",
        "answer": "Airspan Networks Inc."
        },
    ],
    'Currency':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['Currency']}",
        "answer": "Japanese Yen"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['Currency']}",
        "answer": "US dollars"
        },
        {
        "context": """“CONTEXT""",
        "question": f"{clause_question['Currency']}",
        "answer": "USD"
        },
    ],
    'AutoRenewal':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['AutoRenewal']}",
        "answer": "two (2) year"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['AutoRenewal']}",
        "answer": "an additional year"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['AutoRenewal']}",
        "answer": "NA"
        },
    ],
    'NonRenewalNoticePeriod':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['NonRenewalNoticePeriod']}",
        "answer": "9 months"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['NonRenewalNoticePeriod']}",
        "answer": "forty-five (45) days"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['NonRenewalNoticePeriod']}",
        "answer": "NA"
        },
    ],
    'TerminationNoticePeriod':
    [
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['TerminationNoticePeriod']}",
        "answer": "thirty (30) days"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['TerminationNoticePeriod']}",
        "answer": "60 days"
        },
        {
        "context": """CONTEXT""",
        "question": f"{clause_question['TerminationNoticePeriod']}",
        "answer": "three months"
        },
    ]

}

In [None]:
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate


# Create example prompts

def create_examples(clause = 'EffectiveDate'):
    example_template = """Context: {context}
    Question: {question}
    Answer: {answer}
    """
    examples = "\n".join([str(i+1)+ '. ' + example_template.format(context=example['context'],question=example['question'],answer=example['answer']) for i, example in enumerate(all_examples[clause])])
    return examples

In [None]:
# print(example_prompt.format(context='C',question='q',answer='a'))

In [None]:
# Build prompt
prefix = """Follow the following set of instructions that describes a task:
1. Use the following pieces of context to extract an answer to the question at the end.
2. Do not add any explaination/justification for answers and do not attempt to asnwer more than than what is asked.
3. If you don't know the answer or if the answer is not present in the given context, just say "NA", don't try to make up an answer.

Here are some examples:

"""

suffix="""\n\nNow, use the context given below to extract an answer to the question that follows.\n
Context: {context}

Question: {question}
Answer:"""

prompt_template = prefix + create_examples('EffectiveDate') + suffix

PROMPT_FEW_SHOT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
print ("------- Prompt Begin -------")

final_prompt = PROMPT_FEW_SHOT.format(context='CONTEXT', question=clause_question["EffectiveDate"])
print(final_prompt)

print ("------- Prompt End -------")

In [None]:
# llm = OpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.1, max_tokens=512)
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.1, max_tokens=256)#, best_of=3)
# llm = ChatOpenAI(model_name="gpt-4", temperature=0.1, max_tokens=512)#, best_of=3)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": PROMPT_FEW_SHOT, "verbose": False},
)

query = clause_question["EffectiveDate"]
result = qa({"query": query})

In [None]:
result

{'query': 'What is the effective date of the agreement? or When does the agreement become operative?',
 'result': ' NA'}

In [None]:
# Build prompt
prompt_template_zero_shot = """Follow the following set of instructions that describes a task:
1. Use the following pieces of context to extract an answer to the question at the end.
2. Do not add any explaination/justification for answers and do not attempt to asnwer more than than what is asked.
3. If you don't know the answer or if the answer is not present in the given context, just say "NA", don't try to make up an answer.

Context: {context}

Question: {question}
Helpful Answer:"""
PROMPT_ZERO_SHOT = PromptTemplate(
    template=prompt_template_zero_shot, input_variables=["context", "question"]
)

# Create a prefix and suffix for the prompt
prefix = """Follow the following set of instructions that describes a task:
1. Use the following pieces of context to extract an answer to the question at the end.
2. Do not add any explaination/justification for answers and do not attempt to asnwer more than than what is asked.
3. If you don't know the answer or if the answer is not present in the given context, just say "NA", don't try to make up an answer.

Here are some examples:

"""

suffix="""\n\nNow, use the context given below to extract an answer to the question that follows.\n
Context: {context}

Question: {question}
Answer:"""

import csv
import time

fields = ['Entity/Clause', 'Query', 'Results', 'Few-Shot']
llm1 = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.1, max_tokens=256)
llm2 = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.1, max_tokens=512)
# llm = ChatOpenAI(model_name="gpt-4", temperature=0.1, max_tokens=512)
# llm = OpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.1, max_tokens=512)
with open('results_ss.csv', mode='w', encoding='utf-8') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(fields)
    for k in small_clauses:
        if k in ['DocumentName','EffectiveDate', 'ValidityTerm', 'Vendor', 'Currency', 'AutoRenewal', 'NonRenewalNoticePeriod', 'TerminationNoticePeriod']:
            # Create the FewShotPromptTemplate
            prompt_template_few_shot = prefix + create_examples(k) + suffix
            PROMPT_FEW_SHOT = PromptTemplate(
                template=prompt_template_few_shot, input_variables=["context", "question"]
            )
            qa = RetrievalQA.from_chain_type(
                llm=llm1,
                chain_type="stuff",
                retriever=retriever,
                return_source_documents=False,
                chain_type_kwargs={"prompt": PROMPT_FEW_SHOT, "verbose": False},
            )
            query = clause_question[k]
            try:
                result = qa({"query": query})['result']
            except Exception as e:
                result = str(e)
            csvwriter.writerows([[k,query,result.lstrip(),'Yes']])
        else:
            qa = RetrievalQA.from_chain_type(
                llm=llm2,
                chain_type="stuff",
                retriever=retriever,
                return_source_documents=False,
                chain_type_kwargs={"prompt": PROMPT_ZERO_SHOT, "verbose": False})

            query = clause_question[k]
            try:
                result = qa({"query": query})['result']
            except Exception as e:
                result = str(e)
            csvwriter.writerows([[k,query,result.lstrip(),'No']])
        print(result)
        time.sleep(5)

In [None]:
import glob
for file_path in glob.glob('/content/drive/MyDrive/CLM/eval_docs/*txt'):
    print(file_path)

### Ensemble

In [None]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.vectorstores import FAISS


# initialize the bm25 retriever and faiss retriever
bm25_retriever = BM25Retriever.from_documents(docs)
bm25_retriever.k = 1

embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
)
faiss_vectorstore = FAISS.from_documents(docs, embeddings)
faiss_retriever = faiss_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k":3})

# initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever], weights=[0.2, 0.8]
)

In [None]:
ensemble_retriever.get_relevant_documents(clause_question["EffectiveDate"], )