#### Imports, setting environment and GPT API key

In [44]:
import os
from dotenv import load_dotenv #library to read the environment variables
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader #there are a bunch of loaders in langchain library
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import DocArrayInMemorySearch
from operator import itemgetter

#environment variables
load_dotenv()

#key config
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") #get the key

#### Functions to generate the model and the responses

In [45]:
def chain_generator(MODEL, document=None):
    """
    Generates a LangChain processing chain for answering questions based on the context extracted from a document.

    Parameters:
    - MODEL (str): The name of the AI model to use. This can be a GPT model (e.g., "gpt-3.5-turbo") from OpenAI or an Ollama model.
    - document (str): The path to the PDF document that will be used as the context source for answering questions.

    Returns:
    - chain (Chain): A LangChain object representing the processing chain. This chain includes context retrieval,
      question handling, and the final model response based on the provided document context.
    """
    #sets the model
    if MODEL.startswith("gpt"):
        model = ChatOpenAI(api_key=OPENAI_API_KEY, model=MODEL)
        embeddings = OpenAIEmbeddings()
    else:
        model = Ollama(model=MODEL)
        embeddings = OllamaEmbeddings()

    print("Setting up parser and prompt template...")
    #parser
    parser = StrOutputParser()

    #prompt
    template = """
    Objective: Given a CDM (Common Domain Model) TradeState, 
            determine which of the following regulatory bodies the transaction must be reported to:
            - CFTC (Commodity Futures Trading Commission)
            - EMIR (European Market Infrastructure Regulation)
            - JFSA (Japan Financial Services Agency)
            - UKEMIR (UK European Market Infrastructure Regulation)
            - MAS (Monetary Authority of Singapore)
            - ASIC (Australian Securities and Investments Commission)

            Output:
            Just enumerate the regulatory bodies which are applicable. Do not include any other word (e.g. "CFTC, EMIR, JFSA").
            If no regulatory bodies apply for this trade transaction return "None".

    CDM TradeState: {question}
    """
    prompt = PromptTemplate.from_template(template) #takes the prompt

    print("Building the final chain...")
    #final chain
    chain = (
        prompt
        | model
        | parser
    )
    print ("Chain completed!")
    print ("===========================================================")
    return chain

def get_answers (chain, questions):
    """
    Retrieves answers to a list of questions using a given LangChain processing chain.

    Parameters:
    - chain (Chain): A LangChain object representing the processing chain for retrieving and answering questions.
    - questions (list of str): A list of questions to be answered using the provided chain.

    Returns:
    - dict: A dictionary where keys are 'sample-1', 'sample-2', etc., and values are the corresponding answers.
    """
    ans_dict = {}
    for idx, question in enumerate(questions):
        answer = chain.invoke({"question": question})
        key = f"sample-{idx + 1}"
        ans_dict[key] = answer
    return ans_dict



def eligibility_detector(sample_paths):
    """
    Opens the files and apply the LLM to return the jurisdictions where the transaction needs to be reported.

    Parameters:
    - sample_paths: A string with a single relative path or a list of strings with the relative paths to the CDM samples representing the transactions.

    Returns:
     - dict: A dictionary where keys are 'sample-1', 'sample-2', etc., and values are the corresponding answers.
    """
    # Single cardinality case
    if type(sample_paths) == str: 
        sample_paths = [sample_paths]
    # Open and read the JSON file content as a string
    sample_json = []
    for sample_path in sample_paths:
        with open(str(sample_path), 'r') as file:
            sample_json.append(file.read())
    return get_answers (chain_generator("gpt-4o"), sample_json)

#### Application Example

In [46]:
#multiple samples
response_multiple = eligibility_detector(['cdm-samples/credit-index-abx-abxhe.json', 'cdm-samples/credit-indextranche-abx-abxtranche.json'])
response_multiple


Setting up parser and prompt template...
Building the final chain...
Chain completed!


{'sample-1': 'CFTC, EMIR, UKEMIR', 'sample-2': 'CFTC, EMIR'}

In [47]:
#single sample
response_single = eligibility_detector('cdm-samples/credit-index-abx-abxhe.json')
response_single

Setting up parser and prompt template...
Building the final chain...
Chain completed!


{'sample-1': 'CFTC'}