In [10]:
import sys
import os
import json
import re

src_directory = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(src_directory)

from llm_handler import *

In [13]:
rag_knowledge_base_path = "../../data/cdm_samples_json"

llm_handler = LLMHandler(
    model_id="meta-llama/Llama-3.2-3B-Instruct",
    token="huggingface access token"
)

# Initialize RAG within the LLMHandler
llm_handler.initialize_rag(
    documents_path=rag_knowledge_base_path,
    embed_model_name="BAAI/bge-small-en-v1.5",
    top_k=5,
    similarity_cutoff=0.6
)

Downloading shards: 100%|██████████| 2/2 [02:39<00:00, 79.71s/it] 
Loading checkpoint shards: 100%|██████████| 2/2 [00:20<00:00, 10.10s/it]


Initialized LLM with model: meta-llama/Llama-3.2-3B-Instruct
LLM is explicitly disabled. Using MockLLM.
Initialized RAG with 856 documents and embedding model: BAAI/bge-small-en-v1.5


In [14]:

contract_descriptions_path = 'test_data'

contracts = []

for filename in os.listdir(contract_descriptions_path):
    if filename.endswith('.txt'):
        file_path = os.path.join(contract_descriptions_path, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            contracts.append({'name': os.path.splitext(filename)[0], 'description': file.read().strip()})

In [15]:
def extract_json(string):
    print("Curr Json String ---> ")
    print(string)
    match = re.search(r'(\{.*\})', string, re.DOTALL)
    if match:
        json_string = match.group(1)
        
        try:
            json_obj = json.loads(json_string)
            return json_obj
        except Exception:
            return None
    else:
        return None

def is_json(string):
    try:
        json.loads(string)
        return True
    except Exception:
        return False

In [18]:
cdms = []

for contract in contracts:
    
    while(True):
        contract_summary = llm_handler.generate("Summarize the following contract in AT MOST 500 words : " + contract["description"])
        if len(contract_summary) < 5000:
            print("Contract Summary for RAG context retrieval is generated!")
            break

    # RAG context retrieval
    rag_context = llm_handler.get_context_using_rag(f"Find CDM representations that best capture the information from a derivatives contract like the following:\n\n{contract_summary}")

    # Prepare prompts
    basic_prompt = f"""
    Represent the following derivatives contract in a complete ISDA CDM JSON format, adhering strictly to the CDM schema. Ensure all key details are covered and follow the schema definitions exactly:

    Derivatives Contract:
    {contract['description']}

    Please ensure:
    1. The output is **concise** but **complete**, following the ISDA CDM dictionary.
    2. **Only** provide the JSON representation, with no additional text or explanations.
    3. If the content exceeds the token limit, **truncate** appropriately while still providing a **valid** and complete JSON structure.
    
    Output the **final JSON** below.
    """

    rag_prompt = f"""
    Represent the following derivatives contract in a complete ISDA CDM JSON format, adhering strictly to the CDM schema. Ensure all key details are covered and follow the schema definitions exactly:

    Derivatives Contract:
    {contract['description']}

    Relevant CDM Examples for Reference:
    {rag_context}

    Please ensure:
    1. The output is **concise** but **complete**, following the ISDA CDM dictionary.
    2. **Only** provide the JSON representation, with no additional text or explanations.
    3. If the content exceeds the token limit, **truncate** appropriately while still providing a **valid** and complete JSON structure.
    
    Output the **final JSON** below.
    """


    # Saving the responses to JSON files if they don't already exist
    cdm_file_path = f"test_data/generated_cdms/Llama-3.2-3B-Instruct/{contract['name']}_cdm.json"
    cdm_rag_file_path = f"test_data/generated_cdms/Llama-3.2-3B-Instruct/{contract['name']}_cdm_rag.json"

    if not os.path.exists(cdm_file_path):
        conf = ""
        while(True):
            cdm_response = extract_json(llm_handler.generate(basic_prompt+conf))
            if cdm_response:
                print("Generated a correct JSON!!")
                break
            else:
                print(cdm_response, "\n")
                conf = "\n\nNote that your previous response didn't generate a COMLETE JSON probably because of the token limit. Make sure to generate a COMPLETE JSON this time even if you need to ignore some less important data."
                print("Generated CDM is not a correct JSON. Generating again...")

        with open(cdm_file_path, 'w') as file:
            json.dump(cdm_response, file, indent=4)
        print(f"Saved CDM response to {cdm_file_path}")

    if not os.path.exists(cdm_rag_file_path):
        conf = ""
        while(True):
            cdm_with_rag_response = extract_json(llm_handler.generate(rag_prompt+conf))
            if cdm_with_rag_response:
                print("(RAG) Generated a correct JSON!!")
                break
            else:
                print(cdm_with_rag_response, "\n")
                conf = "\n\nNote that your previous response didn't generate a COMLETE JSON probably because of the token limit. Make sure to generate a COMPLETE JSON this time even if you need to ignore some less important data."
                print("(RAG) Generated CDM is not a correct JSON. Generating again...")

        with open(cdm_rag_file_path, 'w') as file:
            json.dump(cdm_with_rag_response, file, indent=4)
        print(f"Saved CDM_RAG response to {cdm_rag_file_path}")


    # Print or process the responses
    print("CDM Response without RAG:")
    print(cdm_response)
    print("\nCDM Response with RAG:")
    print(cdm_with_rag_response)

    cdms.append(
        {
            "name": contract['name'],
            "description": contract['description'],
            "cdm": cdm_response,
            "cdm_rag": cdm_with_rag_response
        }
    )

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Contract Summary for RAG context retrieval is generated!


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


CDM Response without RAG:
None

CDM Response with RAG:
{'DerivativesContract': {'TradeDate': '2018-04-19', 'TradeTime': 'Available on request', 'EffectiveDate': '2018-04-23', 'TerminationDate': 'The final Cash Settlement Payment Date', 'EquityAmounts': {'LongPortfolio': 'As specified in Annex 1', 'ShortPortfolio': 'As specified in Annex 1', 'Number of Units': 'As specified in Annex 1'}, 'FloatingAmounts': {'FloatingAmountPayer': 'Party B', 'FloatingAmountPayment': {'paymentDateSchedule': {'interimPaymentDates': [{'adjustableDates': {'unadjustedDate': '2018-05-02', 'dateAdjustments': {'businessDayConvention': 'FOLLOWING', 'businessCenters': {'businessCenter': [{'value': 'EUTA'}, {'value': 'GBLO'}], 'meta': {'globalKey': '4158421'}}, 'meta': {'globalKey': 'da3b6050'}}}, 'meta': {'globalKey': 'da15ebc2', 'externalKey': 'trs-eq1-InterimEquityPaymentDate'}}], 'finalPaymentDate': {'relativeDate': {'periodMultiplier': 2, 'period': 'D', 'meta': {'globalKey': 'fc30271'}, 'dayType': 'CurrencyBus

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


CDM Response without RAG:
None

CDM Response with RAG:
{'DerivativesContract': {'TradeDate': '2018-04-19', 'TradeTime': 'Available on request', 'EffectiveDate': '2018-04-23', 'TerminationDate': 'The final Cash Settlement Payment Date', 'EquityAmounts': {'LongPortfolio': 'As specified in Annex 1', 'ShortPortfolio': 'As specified in Annex 1', 'Number of Units': 'As specified in Annex 1'}, 'FloatingAmounts': {'FloatingAmountPayer': 'Party B', 'FloatingAmountPayment': {'paymentDateSchedule': {'interimPaymentDates': [{'adjustableDates': {'unadjustedDate': '2018-05-02', 'dateAdjustments': {'businessDayConvention': 'FOLLOWING', 'businessCenters': {'businessCenter': [{'value': 'EUTA'}, {'value': 'GBLO'}], 'meta': {'globalKey': '4158421'}}, 'meta': {'globalKey': 'da3b6050'}}}, 'meta': {'globalKey': 'da15ebc2', 'externalKey': 'trs-eq1-InterimEquityPaymentDate'}}], 'finalPaymentDate': {'relativeDate': {'periodMultiplier': 2, 'period': 'D', 'meta': {'globalKey': 'fc30271'}, 'dayType': 'CurrencyBus

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


CDM Response without RAG:
None

CDM Response with RAG:
{'DerivativesContract': {'TradeDate': '2018-04-19', 'TradeTime': 'Available on request', 'EffectiveDate': '2018-04-23', 'TerminationDate': 'The final Cash Settlement Payment Date', 'EquityAmounts': {'LongPortfolio': 'As specified in Annex 1', 'ShortPortfolio': 'As specified in Annex 1', 'Number of Units': 'As specified in Annex 1'}, 'FloatingAmounts': {'FloatingAmountPayer': 'Party B', 'FloatingAmountPayment': {'paymentDateSchedule': {'interimPaymentDates': [{'adjustableDates': {'unadjustedDate': '2018-05-02', 'dateAdjustments': {'businessDayConvention': 'FOLLOWING', 'businessCenters': {'businessCenter': [{'value': 'EUTA'}, {'value': 'GBLO'}], 'meta': {'globalKey': '4158421'}}, 'meta': {'globalKey': 'da3b6050'}}}, 'meta': {'globalKey': 'da15ebc2', 'externalKey': 'trs-eq1-InterimEquityPaymentDate'}}], 'finalPaymentDate': {'relativeDate': {'periodMultiplier': 2, 'period': 'D', 'meta': {'globalKey': 'fc30271'}, 'dayType': 'CurrencyBus

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


CDM Response without RAG:
None

CDM Response with RAG:
{'DerivativesContract': {'TradeDate': '2018-04-19', 'TradeTime': 'Available on request', 'EffectiveDate': '2018-04-23', 'TerminationDate': 'The final Cash Settlement Payment Date', 'EquityAmounts': {'LongPortfolio': 'As specified in Annex 1', 'ShortPortfolio': 'As specified in Annex 1', 'Number of Units': 'As specified in Annex 1'}, 'FloatingAmounts': {'FloatingAmountPayer': 'Party B', 'FloatingAmountPayment': {'paymentDateSchedule': {'interimPaymentDates': [{'adjustableDates': {'unadjustedDate': '2018-05-02', 'dateAdjustments': {'businessDayConvention': 'FOLLOWING', 'businessCenters': {'businessCenter': [{'value': 'EUTA'}, {'value': 'GBLO'}], 'meta': {'globalKey': '4158421'}}, 'meta': {'globalKey': 'da3b6050'}}}, 'meta': {'globalKey': 'da15ebc2', 'externalKey': 'trs-eq1-InterimEquityPaymentDate'}}], 'finalPaymentDate': {'relativeDate': {'periodMultiplier': 2, 'period': 'D', 'meta': {'globalKey': 'fc30271'}, 'dayType': 'CurrencyBus

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


CDM Response without RAG:
None

CDM Response with RAG:
{'DerivativesContract': {'TradeDate': '2018-04-19', 'TradeTime': 'Available on request', 'EffectiveDate': '2018-04-23', 'TerminationDate': 'The final Cash Settlement Payment Date', 'EquityAmounts': {'LongPortfolio': 'As specified in Annex 1', 'ShortPortfolio': 'As specified in Annex 1', 'Number of Units': 'As specified in Annex 1'}, 'FloatingAmounts': {'FloatingAmountPayer': 'Party B', 'FloatingAmountPayment': {'paymentDateSchedule': {'interimPaymentDates': [{'adjustableDates': {'unadjustedDate': '2018-05-02', 'dateAdjustments': {'businessDayConvention': 'FOLLOWING', 'businessCenters': {'businessCenter': [{'value': 'EUTA'}, {'value': 'GBLO'}], 'meta': {'globalKey': '4158421'}}, 'meta': {'globalKey': 'da3b6050'}}}, 'meta': {'globalKey': 'da15ebc2', 'externalKey': 'trs-eq1-InterimEquityPaymentDate'}}], 'finalPaymentDate': {'relativeDate': {'periodMultiplier': 2, 'period': 'D', 'meta': {'globalKey': 'fc30271'}, 'dayType': 'CurrencyBus

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Curr Json String ---> 
```json
{
  "Agreement": {
    "AgreementType": "ISDA Master Agreement",
    "AgreementDate": "19930506",
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": nu

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Curr Json String ---> 
```json
{
  "Agreement": {
    "AgreementType": "ISDA Master Agreement",
    "AgreementDate": "19930506",
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText": null,
    "AmendmentNumberText": null,
    "AmendmentDateText": null,
    "AmendmentNumberText": null,
    "AmendmentText": null,
    "AmendmentNumber": null,
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText": null,
    "AmendmentNumber": null,
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText": null,
    "AmendmentNumber": null,
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText": null,
    "AmendmentNumber": null,
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText": null,
    "AmendmentNumber": null,
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText": null,
    "AmendmentNumber": null,
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText"

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Curr Json String ---> 
```json
{
  "Agreement": {
    "AgreementType": "ISDA Master Agreement",
    "AgreementDate": "19930506",
    "AmendmentDate": "19930506",
    "AmendmentNumber": "1",
    "AmendmentText": "ISDA Master Agreement dated 6th May 1993, as amended and supplemented from time to time, between the parties.",
    "AmendmentType": "ISDA Master Agreement",
    "AmendmentVersion": "ISDA Master Agreement",
    "AmendmentVersionDate": "19930506",
    "AmendmentVersionText": "ISDA Master Agreement dated 6th May 1993, as amended and supplemented from time to time, between the parties.",
    "AmendmentVersionNumber": "1",
    "AmendmentVersionNumberText": "1",
    "AmendmentVersionDateText": "6th May 1993",
    "AmendmentVersionNumberText": "1",
    "AmendmentVersionText": "ISDA Master Agreement dated 6th May 1993, as amended and supplemented from time to time, between the parties.",
    "AmendmentVersion": "ISDA Master Agreement",
    "AmendmentVersionDate": "19930506",
    "Amen

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Curr Json String ---> 
```json
{
  "Agreement": {
    "AgreementType": "Confirmation",
    "AgreementDate": "2013-02-15",
    "AmendmentDate": null,
    "AmendmentNumber": null,
    "AmendmentText": null,
    "AmendmentAppendix": null,
    "AmendmentAppendixText": null,
    "AmendmentAppendixAttachment": null,
    "AmendmentAppendixAttachmentText": null,
    "AmendmentAppendixAttachmentFile": null,
    "AmendmentAppendixAttachmentFileText": null,
    "AmendmentAppendixAttachmentFileBase64": null,
    "AmendmentAppendixAttachmentFileTextEncoding": null,
    "AmendmentAppendixAttachmentFileBase64Encoding": null,
    "AmendmentAppendixAttachmentFileMimeType": null,
    "AmendmentAppendixAttachmentFileExtension": null,
    "AmendmentAppendixAttachmentFileSize": null,
    "AmendmentAppendixAttachmentFile": null,
    "AmendmentAppendixAttachmentFile": null,
    "AmendmentAppendixAttachmentFile": null,
    "AmendmentAppendixAttachmentFile": null,
    "AmendmentAppendixAttachmentFile": null,
 

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Curr Json String ---> 
```json
{
  "Agreement": {
    "AgreementType": "Confirmation",
    "AgreementDate": "2013-02-15",
    "Parties": [
      {
        "PartyID": "PartyA",
        "PartyName": "Merrill Lynch International Bank Limited",
        "PartyType": "Buyer"
      },
      {
        "PartyID": "PartyB",
        "PartyName": "Party B",
        "PartyType": "Seller"
      }
    ],
    "MasterAgreement": {
      "MasterAgreementType": "ISDA Master Agreement",
      "MasterAgreementDate": "1993-05-06",
      "Amendments": [
        {
          "AmendmentType": "Amendment",
          "AmendmentDate": "2013-02-15"
        }
      ]
    },
    "Derivatives": [
      {
        "DerivativesType": "Currency Option",
        "TradeDate": "2013-02-15",
        "Currency": "CNY/USD",
        "OptionType": "Call",
        "OptionStyle": "European",
        "CallCurrency": "CNY",
        "CallAmount": "437500000.00",
        "PutCurrency": "USD",
        "PutAmount": "70000000.00",
       

In [26]:
from evaluate import *

cdm_schema_path = "../cdm_schema/"
cdm_repo_path = "../cdm_schema/cdm_schema_json"

cdms = []

for contract in contracts:

    # exclude = ['bofa_interest_rate_swap', 'bofa_equity_option', 'jpmorgan_interest_rate_swap']
    
    # if contract['name'] in exclude:
    #     continue
  
    cdm_file_path = f"test_data/generated_cdms/Llama-3.2-3B-Instruct/{contract['name']}_cdm.json"
    cdm_rag_file_path = f"test_data/generated_cdms/Llama-3.2-3B-Instruct/{contract['name']}_cdm_rag.json"

    if os.path.exists(cdm_file_path):
        with open(cdm_file_path, 'r') as cdm_file:
            cdm = json.load(cdm_file)

    if os.path.exists(cdm_rag_file_path):
        with open(cdm_rag_file_path, 'r') as cdm_rag_file:
            cdm_rag = json.load(cdm_rag_file)

    syntactic_correctness, schema_adherence = evaluate_cdm(cdm, cdm_schema_path, cdm_repo_path)
    rag_syntactic_correctness, rag_schema_adherence = evaluate_cdm(cdm_rag, cdm_schema_path, cdm_repo_path)

    semantic_coverage = get_coverage_score(get_coverage(llm_handler, contract['description'], cdm))
    rag_semantic_coverage = get_coverage_score(get_coverage(llm_handler, contract['description'], cdm_rag))

    print("\n", contract['name'])
    print("-----------------------\n")
    print(syntactic_correctness)
    print(schema_adherence)
    print("\n")
    print(rag_syntactic_correctness)
    print(rag_schema_adherence)
    print("\n")
    print(semantic_coverage)
    print(rag_semantic_coverage)
    print("\n")

    print("======================\n")
    # break

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 rbccapital_swap
-----------------------

{'total_json_keys': 46, 'total_correct_keys': 19, 'correct_percentage': 41.30434782608695, 'incorrect_keys': ['interestaccrual', 'interestpaymentdates', 'leadmanager', 'programme', 'rating', 'derivativescontract', 'maturitydate', 'floatingrateday', 'switchboard', 'accrualfrequency', 'interestpayment', "standard & poor's", 'followingfixedrateday', 'isincode', 'switchdate', 'issuedate', 'denominations', 'prospectus', 'coupon', 'issueamount', 'purpose', 'switchnoticeperiod', 'switchfeature', 'settlement', 'amendedtermsheet', 'facsimile', "moody's"]}
44.73684210526316


{'total_json_keys': 63, 'total_correct_keys': 63, 'correct_percentage': 100.0, 'incorrect_keys': []}
97.95918367346938


{'coverage_dict': {'matched_info': ['"contract: credit default swap",', '"seller: Party 1",', '"buyer: Party 2",', '"ISDA 2003 credit derivatives definition",', '"issuer: Royal Bank of Canada (Toronto Branch)",', '"rating: Aaa by Moody’s / AA- by Standard & Poor’

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 bofa_equity_swap_confirmation
-----------------------

{'total_json_keys': 37, 'total_correct_keys': 11, 'correct_percentage': 29.72972972972973, 'incorrect_keys': ['confirmation', 'designatedmaturity', 'officeofpartyb', 'derivativescontract', 'floatingrateday', 'equityamounts', 'flatcompounding', 'timeoftransaction', 'masterconfirmation', 'shortportfolio', 'reservetransaction', 'floatingamountpayer', 'principal', 'longportfolio', 'authorisation', 'numberofunits', 'floatingamounts', 'floatingrateoption', 'capitalisedterms', 'cashsettlementpaymentdate', 'floatingamountpaymentdates', 'officeofpartya', 'offices', 'initialprice', 'swapmasterconfirmationtransactiondocumentation', 'swapmasterconfirmation']}
43.10344827586207


{'total_json_keys': 65, 'total_correct_keys': 46, 'correct_percentage': 70.76923076923077, 'incorrect_keys': ['partya', 'designatedmaturity', 'officeofpartyb', 'derivativescontract', 'floatingrateday', 'flatcompounding', 'equityamounts', 'shortportfolio', 'floatingam

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 bofa_fx_option
-----------------------

{'total_json_keys': 37, 'total_correct_keys': 16, 'correct_percentage': 43.24324324324324, 'incorrect_keys': ['premium', 'putcurrency', 'callamount', 'amendments', 'offshoredeliverablecny', 'derivativestype', 'representation', 'amendmenttype', 'amendmentdate', 'additionaldisruptioneventprovisions', 'masteragreementdate', 'premiumpaymentdate', 'partytype', 'optionstyle', 'putamount', 'transaction', 'masteragreement', 'callcurrency', 'nonreliance', 'fallbackmatrix', 'derivatives']}
41.666666666666664


{'total_json_keys': 50, 'total_correct_keys': 40, 'correct_percentage': 80.0, 'incorrect_keys': ['premiumpaymentdate', 'foranoffshoredeliverablecnytransaction', 'premium', 'additioanaldisruptioneventprovisions', 'businessdaysforsettlementdate', 'nonreliance', 'timezone', 'putcurrencyandamount', 'callcurrencyandamount', 'isdaoffshoredeliverablecnytransactiondisruptionfallbackmatrix']}
95.45454545454545


{'coverage_dict': {'matched_info': ['"contrac