#### Imports, setting environment and GPT API key

In [10]:
import os
from dotenv import load_dotenv #library to read the environment variables
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader #there are a bunch of loaders in langchain library
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import DocArrayInMemorySearch
from operator import itemgetter

#environment variables
load_dotenv()

#key config
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") #get the key

#### Functions to generate the model and the responses

In [62]:
def changes_detector_model(MODEL, question):
    """
    Generates a LangChain processing chain for answering questions based on the context extracted from a document.

    Parameters:
    - MODEL (str): The name of the AI model to use. This can be a GPT model (e.g., "gpt-3.5-turbo") from OpenAI or an Ollama model.
    - document (str): The path to the PDF document that will be used as the context source for answering questions.

    Returns:
    - chain (Chain): A LangChain object representing the processing chain. This chain includes context retrieval,
      question handling, and the final model response based on the provided document context.
    """
    #sets the model
    if MODEL.startswith("gpt"):
        model = ChatOpenAI(api_key=OPENAI_API_KEY, model=MODEL)
        embeddings = OpenAIEmbeddings()
    else:
        model = Ollama(model=MODEL)
        embeddings = OllamaEmbeddings()

    #print("Setting up parser and prompt template...")
    #parser
    parser = StrOutputParser()

    #prompt
    template = """ 
    Given a the updates of the new CFTC technical specification and a CFTC Report, you will detect which changes are required in the report according to the specification update. You will return the changes as an instruction for another GPT to apply them. Here you have a response example:
    "Required changes:
    - Field #3 - UTI: replace it so that it is populated with the internal ID instead of the external.
    - Field #4 - Event Timestamp: populate it with the Timestamp format instead of Date format."

    Do not apply additional validations, just focus on the changes estrictly coming from the specification update. Do not include any refference to the fields were no changes are needed for the affected report.

    Updates:

    - [#2 - Central Counterparty] field: optional to be populated if the field [#1 - Cleared] is populated with "I" (before it was not required).

    - [#36 - Call amount] and [#38 - Put amount]: if it's an option, then at least one of the fields is required: [#36 - Call amount] or [#38 - Put amount] (before both were required).

    - [#116 - Initial margin collateral portfolio code] and [#124 - Variation margin collateral portfolio code]: Optional to be populated (before they were mandatory).

    - [#39 - Put Currency]: it can only be populated with EUR.
    - [Execution Timestamp]: field renamed to "Excution Date" and is populated with the execution date instead of the timestamp.

    Report: {question}
    """
    prompt = PromptTemplate.from_template(template) #takes the prompt

    #print("Building the final chain...")
    #final chain
    chain = (
        prompt
        | model
        | parser
    )
    #print ("Chain completed!")
    #print ("===========================================================")
    return chain.invoke({"question": question})


def changes_application_model(MODEL, question, sample, changes):
    """
    Generates a LangChain processing chain for answering questions based on the context extracted from a document.

    Parameters:
    - MODEL (str): The name of the AI model to use. This can be a GPT model (e.g., "gpt-3.5-turbo") from OpenAI or an Ollama model.
    - document (str): The path to the PDF document that will be used as the context source for answering questions.

    Returns:
    - chain (Chain): A LangChain object representing the processing chain. This chain includes context retrieval,
      question handling, and the final model response based on the provided document context.
    """
    #sets the model
    if MODEL.startswith("gpt"):
        model = ChatOpenAI(api_key=OPENAI_API_KEY, model=MODEL)
        embeddings = OpenAIEmbeddings()
    else:
        model = Ollama(model=MODEL)
        embeddings = OllamaEmbeddings()

    #print("Setting up parser and prompt template...")
    #parser
    parser = StrOutputParser()

    #prompt
    template = """ 
    Given a the changes to be done to the CFTC report and the CDM TradeState it was built from, return the report object whith the changes applied.
    If necessary, re-extract information from the original CDM Sample where the report was generated from.
    Just return the report object updated, don't include any additional word.
    
    Original report: {question}
    CDM Sample: {sample}

    Changes to be applied: {changes}
    """
    prompt = PromptTemplate.from_template(template) #takes the prompt

    #print("Building the final chain...")
    #final chain
    chain = (
        prompt
        | model
        | parser
    )
    #print ("Chain completed!")
    #print ("===========================================================")
    return chain.invoke({"question": question, "changes": changes, "sample": sample})



def regulatory_updates_applicator(report, sample):
    """
    Opens the files and apply the LLM to return the jurisdictions where the transaction needs to be reported.

    Parameters:
    - sample_paths: A string with a single relative path or a list of strings with the relative paths to the CDM samples representing the transactions.

    
    Returns:
     - dict: A dictionary where keys are 'sample-1', 'sample-2', etc., and values are the corresponding answers.
    """
    changes = changes_detector_model("gpt-3.5-turbo", report)
    print ("changes", changes)
    return changes_application_model("gpt-3.5-turbo", report, sample, changes)

#### Application Example

In [63]:
#loading report
report_1_path = "reports/IR-Option-Swaption-ex01-Bermuda-report.json"
with open(str(report_1_path), 'r') as file:
        report_1 = file.read()


#loading sample
sample_1_path = "cdm-samples/IR-Option-Swaption-ex01-Bermuda.json"
with open(str(report_1_path), 'r') as file:
        sample_1 = file.read()


In [64]:
#single sample
response_single = regulatory_updates_applicator(report_1, sample_1)
print(response_single)

changes Required changes:
- Field #36 - Call amount: at least one of the fields [#36 - Call amount] or [#38 - Put amount] is required.
- Field #124 - Variation margin collateral portfolio code: now optional to be populated. 
- Field #39 - Put Currency: populate it with EUR.
- Field [Execution Timestamp] - rename it to "Excution Date" and populate it with the execution date instead of the timestamp.
{
  "actionType" : "NEWT",
  "allocationIndicator" : "UNAL",
  "blockTradeElectionIndicator" : false,
  "buyerIdentifier" : "213800WWTABZ1GOJHH37",
  "cftcGuidebook" : {
    "assetClass" : "InterestRate",
    "maturityDateOfTheUnderlier" : "2032-01-13",
    "messageType" : "Trade State",
    "optionStyle" : "BERM",
    "optionType" : "CALL",
    "physicalCommodityIndicator" : false,
    "sefOrDcmAnonymousExecutionIndicator" : false,
    "sefOrDcmIndicator" : false
  },
  "cleared" : "N",
  "counterparty1" : "213800WWTABZ1GOJHH37",
  "counterparty1FederalEntityIndicator" : false,
  "counterpa