##### Requirements

* ~~Step 1 -> Extract the invoice header & line items for all invoices.~~
  * A -> Add custom fields (like Loan#) that may be not part of standard fields that are extracted from Invoice Model
  * B -> For duplicates, identify that as "Duplicate" and include all extracted metadata/attributes from that service request/invoice.
  ~~* C -> Store the output in "JSON" (same format as excel spreadsheet as far as columns)~~ & SQL
* Step 1a -> Use Layout Model and create the output in "Markdown" and persist that data and apply RAG pattern.
* Step 1b -> Build custom model (potentially 2) to extract the data from Service Request/Invoice

* ~~Step 2 -> Extract the metadata (at line level) from the "Ground truth" -> Excel/CSV file (Standard with "Advances" worksheet)~~

* Step 3 -> Reconciliation Process
  * A -> Reconcile Invoice line items (from Step 1) against Step 2 with various techniques (Fuzzy Description match across all duplicated invoice, ~~matching key metadata~~ - invoice date, payment date, ~~service date~~, ~~invoice#~~, amount, property address, description and vendor name) and based on the outcome, ~~generate the exception report for all line items not matching/missing from extracted data~~.  ~~Create the matching report as different file.~~
  * B -> Reconcile Invoice line items (from Step 2) against Step 1 with various techniques (Fuzzy Description match across all duplicated invoice, ~~matching key metadata~~ - invoice date, payment date, ~~service date~~, ~~invoice#~~, amount, description and vendor name) and based on the outcome, ~~append the mismatch to existing exception report for all line items not matching from extracted data~~. (with indication of Step2 mismatch)

* Step 3a -> LLM Reconciliation Process
  * ~~Build a LLM Prompt such that it can compare JSON/SQL data identifying the match and exception based on key metadata called out earlier.~~
               
* Stretch Goal -> Build semantic model from the data that is stored in relation database & run PBI CoPilot on the top of that


In [1]:
import logging
import azure.functions as func
import os
import requests
import urllib.parse
from datetime import datetime, timedelta
from azure.storage.blob import generate_container_sas
from azure.identity import ManagedIdentityCredential, AzureCliCredential, ChainedTokenCredential
import json
import base64
import tiktoken

In [2]:
# Import Python libraries
import os
import openai
from openai import OpenAI, AzureOpenAI, AsyncAzureOpenAI

In [4]:
import pandas as pd  
import json

client = AzureOpenAI(
                    api_key = os.getenv('OpenAiWestUsKey'),  
                    api_version = os.getenv('OpenAiVersion'),
                    azure_endpoint = os.getenv('OpenAiWestUsEp')
                    )

#### Step 1 - Convert our Ground truth Excel file into JSON Output

In [5]:
def convertToJsonSerializable(data):  
    if pd.isnull(data):  
        # Convert NaN or NaT to None  
        return None  
    elif isinstance(data, pd.Timestamp):  
        # Convert Timestamp to ISO 8601 format string  
        return data.date().isoformat()
    else:  
        # Return other data types unchanged  
        return data 

In [9]:
loanNumber = "4000835968"
dataDirectory = "../Data/Loan/" + loanNumber + "/"

In [11]:
excelFile = dataDirectory + loanNumber + ".xlsx"
outputJson = dataDirectory + "/Python/" + loanNumber + ".json"

# Load the Excel file  
xlsx = pd.ExcelFile(excelFile)  
  
# Create a dictionary to store the data from each sheet  
data = {}  
  
# Iterate through each worksheet in the Excel file  
for sheetName in xlsx.sheet_names:
    if sheetName != "Advances" and sheetName != "Summary":
        continue
    
    df = pd.read_excel(xlsx, sheetName)

    # Apply the conversion function to each cell in the DataFrame  
    df = df.applymap(convertToJsonSerializable)
      
    # Convert the DataFrame to a dictionary
    if sheetName == "Advances":
        data[sheetName] = df.to_dict(orient='records')
    #else:
    #    data[sheetName] = df.to_dict(orient='records')

# Convert the dictionary to a JSON string 
json_data = json.dumps(data, indent=4, ensure_ascii=False)  
  
# Optionally, you can save this JSON data to a file  
with open(outputJson, 'w') as json_file:  
    json_file.write(json_data)  
  
print(f'JSON file has been created: {outputJson}')

JSON file has been created: ../Data/Loan/4000835968//Python/4000835968.json


In [12]:
# import csv  
# import json  
  
# invoiceSample = "./Data/Invoice/0084518695.csv"
  
# # Replace 'output.json' with the desired path for the JSON output file  
# outputJson = './Data/Invoice/0084518695.json'  
  
# # Read the CSV and convert it to a dictionary  
# data = []  
# with open(invoiceSample, mode='r', encoding='utf-8') as csvFile:  
#     reader = csv.DictReader(csvFile)  
#     for row in reader:  
#         if any(field.strip() for field in row.values()):  # Check for non-blank rows  
#             data.append(row)
  
# # Write the dictionary to a JSON file  
# with open(outputJson, mode='w', encoding='utf-8') as jsonFile:  
#     json.dump(data, jsonFile, indent=4)  
  
# print(f'JSON file has been created: {outputJson}')  


#### Step 2 - Invoke the Document Intelligence - Invoice Pre-built Model including the Key-Value Pairs

In [13]:
import json
import time
from requests import get, post
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
import re
import ast


In [14]:
def replaceJsonPlaceHolders(json, values):
  # find all placeholders
  placeholders = re.findall('<[\w ]+>', json)
  assert len(placeholders) == len(values), "Please enter the values of all placeholders."

  # replaces all placeholders with values
  for k, v in values.items():
      placeholder = "<%s>" % k
      json = json.replace(placeholder, v)

  return json

In [15]:
def replaceJsonPlaceHolder(json, values):
  # replaces all placeholders with values
  for k, v in values.items():
      placeholder = "<%s>" % k
      json = json.replace(placeholder, str(v))

  return json

In [16]:
class DateEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime.date):
            return obj.isoformat()
        return super().default(obj)

In [18]:
sampleDocs = []
# sampleDocs = [
#     './Data/Invoice/4000835968/CORPADV/CORPADV_451102775.pdf',
#     './Data/Invoice/4000835968/CORPADV/CORPADV_451102776.pdf',
#     './Data/Invoice/4000835968/CORPADV/CORPADV_458680339.pdf',
#     './Data/Invoice/4000835968/CORPADV/CORPADV_458680340.pdf',
#     './Data/Invoice/4000835968/CORPADV/CORPADV_462460604.pdf',
#     './Data/Invoice/4000835968/CORPADV/CORPADV_462460605.pdf',
#     './Data/Invoice/4000835968/CORPADV/CORPADV_467091874.pdf',
#     './Data/Invoice/4000835968/CORPADV/CORPADV_467091875.pdf',
#     './Data/Invoice/4000835968/CORPADV/CORPADV_467091876.pdf',
# ]
# sampleDocs = [
#     './Data/Invoice/4000835968/CORPADV/CORPADV_451102775.pdf',
# ]
sampleOutputDocs = []
sourcePath = dataDirectory + "CORPADV/"
destinationPath = dataDirectory + "Python/"
for file in os.listdir(sourcePath):
    if file.endswith(".pdf"):
        sampleDocs.append(sourcePath + file)

for file in os.listdir(destinationPath):
    if file.endswith(".json"):
        sampleOutputDocs.append(destinationPath + file)

In [None]:
extractedData = []
templateStructure = '{"Payment Date":"<Payment_Date>","Service Date":"<Service_Date>", "Expense Type": "","Additional Expense Comments":"<Additional_Expense_Comments>","Expense Description": "<Expense_Description>","Amount Paid": "<Amount_Paid>","Amount Claimed": "","Amount Not Claimed": "","Unclaimed Amount Reason": "","Vendor Name": "<Vendor_Name>","Invoice Number": "<Invoice_Number>","Fee Type Code": "","Recovery Type": "","Actual Recovery Code": "","Expense Code": "","Fee Reference Comments": "","File Name": "","Document Available": "","Notes": "<Notes>"}'
docIntelligenceEndPoint = os.getenv('FormRecognizerEndPoint')
docIntelligenceKey = os.getenv('FormRecognizerKey')
docAnalysisClient = DocumentAnalysisClient(
        endpoint=docIntelligenceEndPoint, credential=AzureKeyCredential(docIntelligenceKey))

output = {"Advances": []}
for sampleDoc in sampleDocs:
    fileName = os.path.basename(sampleDoc).replace(".pdf", ".json")
    # Check if we already have ran the analysis
    #if os.path.exists(sampleDoc.replace(".pdf", ".json")):
    if os.path.exists(destinationPath + fileName):
        print("--------Process Already analyzed Invoice: ", sampleDoc)
        #with open(sampleDoc.replace(".pdf", ".json"), "r") as f:
        with open(destinationPath + fileName, "r") as f:
            invoices = json.load(f)
        paymentDate = ''
        serviceDate = ''
        invoiceNbr = ''
        vendorName = ''
        expenseDesc = ''
        additionalExpenseComments = ''
        amountPaid = ''
        notes = ''
        for idx, invoice in enumerate(invoices["documents"]):
            for name, field in invoice["fields"].items():
                if name != "Items":
                    if name == "VendorName":
                        vendorName = field["content"]
                    if name == "InvoiceId":
                        invoiceNbr = field["content"]
                    if name == "CustomerAddress":
                        notes = field["content"]

                    #print("...{}: {} has confidence {}".format(name, field.content, field.confidence))

            for idx, item in enumerate(invoice["fields"].get("Items").get("value")):
                print("...Item #{}".format(idx))
                for name, field in item["value"].items():
                    if name == "Amount" and field["value_type"] == "currency":
                        try:
                            amountPaid = field["value"]["amount"]
                        except:
                            amountPaid = field["content"]
                    if name == "Date" and field["value_type"] == "date":
                        try:
                            serviceDate = field["value"]
                        except:
                            serviceDate = field["content"]
                        print(serviceDate)
                    if name == "Description":
                        expenseDesc = field["content"]

                    #print("......{}: {} has confidence {}".format(name, field.content, field.confidence))

        for idx, kv in enumerate(invoices["key_value_pairs"]):
            if (kv["value"] != None):
                if (kv["key"]["content"] and kv["value"]["content"]):
                    if kv["key"]["content"] == "PaymentDate":
                        paymentDate = kv["value"]["content"]
                    #print("Key...{}: Value...{}".format(kv.key.content, kv.value.content))
        
        values = {'Service_Date':serviceDate,'Amount_Paid': amountPaid, 'Vendor_Name': vendorName, 
                  'Invoice_Number': invoiceNbr.removeprefix("60"), 'Expense_Description': expenseDesc, 'Additional_Expense_Comments': additionalExpenseComments, 'Notes': notes}
        print(values)
        filledItem = replaceJsonPlaceHolder(templateStructure,values)
        extractedData.append(ast.literal_eval(json.dumps(filledItem)))
        continue
    else:
        print("--------Process Invoice: ", sampleDoc)
        with open(sampleDoc, "rb") as f:
            poller = docAnalysisClient.begin_analyze_document(
                "prebuilt-invoice", document=f, locale="en-US",
                pages="1", features=["keyValuePairs"]
            )
        invoices = poller.result()
        with open(destinationPath + fileName, "w") as f:
            json.dump(invoices.to_dict(), f, indent=4, default=str)
        paymentDate = ''
        serviceDate = ''
        invoiceNbr = ''
        vendorName = ''
        expenseDesc = ''
        additionalExpenseComments = ''
        amountPaid = ''
        notes = ''
        for idx, invoice in enumerate(invoices.documents):
            for name, field in invoice.fields.items():
                if name != "Items":
                    if name == "VendorName":
                        vendorName = field.content
                    if name == "InvoiceId":
                        invoiceNbr = field.content
                    if name == "CustomerAddress":
                        notes = field.content

                    #print("...{}: {} has confidence {}".format(name, field.content, field.confidence))

            for idx, item in enumerate(invoice.fields.get("Items").value):
                #print("...Item #{}".format(idx))
                for name, field in item.value.items():
                    if name == "Amount" and field.value_type == "currency":
                        try:
                            amountPaid = field.value.amount
                        except:
                            amountPaid = field.content
                    if name == "Date" and field.value_type == "date":
                        try:
                            serviceDate = field.value
                        except:
                            serviceDate = field.content
                    if name == "Description":
                        expenseDesc = field.content

                    #print("......{}: {} has confidence {}".format(name, field.content, field.confidence))

        for idx, kv in enumerate(invoices.key_value_pairs):
            if (kv.value != None):
                if (kv.key.content and kv.value.content):
                    if kv.key.content == "PaymentDate":
                        paymentDate = kv.value.content
                    #print("Key...{}: Value...{}".format(kv.key.content, kv.value.content))
        
        values = {'Service_Date':serviceDate,'Amount_Paid': amountPaid, 'Vendor_Name': vendorName, 
                'Invoice_Number': invoiceNbr.removeprefix("60"), 'Expense_Description': expenseDesc, 'Additional_Expense_Comments': additionalExpenseComments, 'Notes': notes}
        print(values)
        filledItem = replaceJsonPlaceHolder(templateStructure,values)
        extractedData.append(ast.literal_eval(json.dumps(filledItem)))

output["Advances"] = extractedData


In [20]:
updatedOutput = json.dumps(output, indent=4, ensure_ascii=False)
updatedOutput = updatedOutput.replace("\"{", "{")
updatedOutput = updatedOutput.replace("}\"", "}")
updatedOutput = updatedOutput.replace("\\", "")
processedOutputJson = dataDirectory + "Python/" + loanNumber + "_FrOut.json"
# Optionally, you can save this JSON data to a file  
with open(processedOutputJson, 'w') as json_file:  
    json_file.write(updatedOutput)

#### STEP 3A - Compare the Ground Truth JSON with the Document Intelligence JSON Output - Using "Python" Code
##### Ensure that the output JSON is in the same format as the Ground Truth JSON for both Matching and Non-Matching Rows

In [21]:
# Function to load and parse a JSON file  
def loadJson(file_path):
    with open(file_path, 'r') as file:  
        return json.load(file)

In [22]:
# Function to compare two lists of dictionaries based on key fields  
def compareJsonArray(jsonArray1, jsonArray2, keyFields):  
    matchingOutput = {"Advances": []}
    nonMatchingOutput = {"Advances": []}
    matchingObjects = []  
    nonMatchingObjects = []  
  
    # Convert the second JSON array to a dictionary for faster lookup  
    json_dict2 = {tuple(item[key] for key in keyFields): item for item in jsonArray2}  
  
    # Iterate through the first JSON array and compare  
    for item1 in jsonArray1:  
        key = tuple(item1[key] for key in keyFields)  
        item2 = json_dict2.get(key)  
        if item2:  
            # If a matching object is found based on key fields, store it  
            #matchingObjects.append({'object1': item1, 'object2': item2})
            matchingObjects.append(item1)
        else:  
            # If no matching object is found, store the non-matching object from the first array  
            nonMatchingObjects.append(item1)  
  
    # Also check for any objects in the second array that didn't match any in the first  
    for item2 in jsonArray2:  
        key = tuple(item2[key] for key in keyFields)  
        if key not in json_dict2:  
            nonMatchingObjects.append(item2)  
  
    matchingOutput["Advances"] = matchingObjects
    nonMatchingOutput["Advances"] = nonMatchingObjects
    return matchingOutput, nonMatchingOutput  

In [24]:
jsonArray1 = loadJson(dataDirectory + "Python/" + loanNumber + ".json")
jsonArray2 = loadJson(dataDirectory + "Python/" + loanNumber + "_FrOut.json")

In [25]:
matching, nonMatching = compareJsonArray(jsonArray1['Advances'], jsonArray2['Advances'], 
                 ['Invoice Number', "Service Date"])
# Output the results  
#print("Matching Objects:")  
#print(json.dumps(matching, indent=4))  
#print("\nNon-Matching Objects:")  
#print(json.dumps(nonMatching, indent=4))  

In [26]:
### Incase if required to find duplicates

# import hashlib

# matchingFiltered = []
# md5List = []

# for item in matching["Advances"]:
#     md5Result = hashlib.md5(json.dumps(item, separators=(',', ':')).encode("utf-8")).hexdigest()
#     if md5Result not in md5List:
#         md5List.append(md5Result)
#         matchingFiltered.append(item)

In [27]:
matchingOutputJson = dataDirectory + "Python/" + loanNumber + "_FrMatching.json"
nonMatchingOutputJson = dataDirectory + "Python/" + loanNumber + "_FrNonMatching.json"
# Optionally, you can save this JSON data to a file  
with open(matchingOutputJson, 'w') as json_file:
    json_file.write(json.dumps(matching, indent=4))

with open(nonMatchingOutputJson, 'w') as json_file:
    json_file.write(json.dumps(nonMatching, indent=4))

#### STEP 3b - Invoke LLM and build Custom Prompt that can be used to Compare the Ground Truth JSON with the Document Intelligence JSON Output

In [28]:
def truncateToken(string: str, encoding_name: str, max_length: int = 128000) -> str:
    """Truncates a text string based on max number of tokens."""
    encoding = tiktoken.encoding_for_model(encoding_name)
    encoded_string = encoding.encode(string)
    num_tokens = len(encoded_string)

    if num_tokens > max_length:
        string = encoding.decode(encoded_string[:max_length])

    return string

def getMessagesFromHistory(systemPrompt: str, userConv: str):
        #messageBuilder = MessageBuilder(systemPrompt, modelId)
        messages = []
        messages.append({'role': 'system', 'content': systemPrompt})
        userContent = truncateToken(string=userConv, encoding_name="cl100k_base", max_length=128000)
        messages.append({'role': "user", 'content': userContent})

In [29]:
# systemTemplate = """Compare the provided JSON data arrays and determine the matching and non-matching rows based on key fields. Display the JSON output for both the matching and non-matching objects.
#                     Context:
#                     You have been given two JSON data arrays, Array A and Array B. Your task is to compare these arrays and identify the rows that match and do not match based on key fields. 
#                     The key fields in the JSON data arrays are "Invoice Number", "Service Date".
                    
#                     Instructions:
#                     Compare Array A and Array B based on specified key fields.
#                     Identify and display the matching rows in JSON format.
#                     Identify and display the non-matching rows in JSON format.
#                     Ensure that the output clearly indicates whether a row is matching or non-matching.
                    
#                     Do not include any explanations, only provide a  RFC8259 compliant JSON response following this format without deviation.
#                     [{
#                         "Payment Date": "Payment_Date",
#                         "Service Date": "2022-02-10",
#                         "Expense Type": "Expense_Type",
#                         "Additional Expense Comments": "Comments here",
#                         "Expense Description": "Description here",
#                         "Amount Paid": "Amount here",
#                         "Amount Claimed": "Amount Claimed here",
#                         "Amount Not Claimed": "Amount Not Claimed here",
#                         "Unclaimed Amount Reason": "",
#                         "Vendor Name": "Vendor Name here",
#                         "Invoice Number": "Invoice Number here",
#                         "Fee Type Code": "",
#                         "Recovery Type": "",
#                         "Actual Recovery Code": "",
#                         "Expense Code": "",
#                         "Fee Reference Comments": "",
#                         "File Name": "",
#                         "Document Available": "",
#                         "Notes": "Notes here"
#                         }]
#                         """

#### Run the LLM to compare two JSON data arrays

In [30]:
systemTemplate = """Compare the provided JSON data arrays and determine the matching rows based on key fields. Display the JSON output for the matching objects.
                    Context:
                    You have been given two JSON data arrays, Array A and Array B. Your task is to compare these arrays and identify the rows that match and do not match based on key fields. 
                    The key fields in the JSON data arrays are "Invoice Number", "Service Date".
                    
                    Instructions:
                    Compare Array A and Array B based on specified key fields.
                    Identify and display the matching rows in JSON format.
                    
                    Do not include any explanations, only provide a  RFC8259 compliant JSON response following this format without deviation.
                    {
                        "Advances":[{
                            "Payment Date": "Payment_Date",
                            "Service Date": "2022-02-10",
                            "Expense Type": "Expense_Type",
                            "Additional Expense Comments": "Comments here",
                            "Expense Description": "Description here",
                            "Amount Paid": "Amount here",
                            "Amount Claimed": "Amount Claimed here",
                            "Amount Not Claimed": "Amount Not Claimed here",
                            "Unclaimed Amount Reason": "",
                            "Vendor Name": "Vendor Name here",
                            "Invoice Number": "Invoice Number here",
                            "Fee Type Code": "",
                            "Recovery Type": "",
                            "Actual Recovery Code": "",
                            "Expense Code": "",
                            "Fee Reference Comments": "",
                            "File Name": "",
                            "Document Available": "",
                            "Notes": "Notes here"
                        }]
                    }
                    """

#### Because of the limitation on 4096 Tokens on the completion, let's break down the steps into multiple chunks

In [32]:
chunks = (len(jsonArray1['Advances']) - 1) // 5 + 1
for i in range(chunks):
     processedOutputJson = dataDirectory + "Python/" + loanNumber + "_LlmMatching" + str(i) + ".json"
     if (os.path.exists(processedOutputJson)):
          continue
     chunkedJsonArray1 = {"Advances": []}
     batch = jsonArray1['Advances'][i*5:(i+1)*5]
     chunkedJsonArray1["Advances"] = batch
     #print(chunkedJsonArray1)
     content = """Array A:{arrayA}
            Array B:{arrayB}
            """
     content = content.format(arrayA=json.dumps(chunkedJsonArray1, indent=4), arrayB=json.dumps(jsonArray2, indent=4))

     messages = []
     messages.append({'role': 'system', 'content': systemTemplate})
     #userContent = truncateToken(string=content, encoding_name="gpt-4-1106-preview", max_length=75000)
     userContent = content
     messages.append({'role': "user", 'content': userContent})

     completion = client.chat.completions.create(
          model=os.getenv('OpenAiGpt4Turbo'), 
          messages=messages,
          temperature=0,
          top_p=0,
          max_tokens=4096,
          n=1)
     answer = completion.choices[0].message.content
     answer = re.sub('\n', '', answer)
     answer = re.sub('```json', '', answer)
     answer = re.sub('```', '', answer)

     llmAnswer = json.loads(answer)
     # Optionally, you can save this JSON data to a file  
     with open(processedOutputJson, 'w') as json_file:  
          json_file.write(json.dumps(llmAnswer, indent=4))

In [33]:
# Combine all our LlmMatching Outputs to a single file
llmMatchingOutput = {"Advances": []}
for i in range(chunks):
     with open(dataDirectory + "Python/" + loanNumber + "_LlmMatching" + str(i) + ".json", "r") as f:
          llmMatchingOutput["Advances"].extend(json.load(f)["Advances"])

processedOutputJson = dataDirectory + "Python/" + loanNumber + "_LlmMatching.json"
# Optionally, you can save this JSON data to a file  
with open(processedOutputJson, 'w') as json_file:  
    json_file.write(json.dumps(llmMatchingOutput, indent=4))

#### Run LLM to find non-matching rows between two JSON data arrays

In [34]:
systemTemplate = """Compare the provided JSON data arrays and determine the non-matching rows based on key fields. Display the JSON output for the non-matching objects.
                    Context:
                    You have been given two JSON data arrays, Array A and Array B. Your task is to compare these arrays and identify the rows that do not match and do not match based on key fields. 
                    The key fields in the JSON data arrays are "Invoice Number", "Service Date".
                    
                    Instructions:
                    Compare Array A and Array B based on specified key fields.
                    Identify and display the non-matching rows in JSON format.
                    
                    Do not include any explanations, only provide a  RFC8259 compliant JSON response following this format without deviation.
                    {
                        "Advances":[{
                            "Payment Date": "Payment_Date",
                            "Service Date": "2022-02-10",
                            "Expense Type": "Expense_Type",
                            "Additional Expense Comments": "Comments here",
                            "Expense Description": "Description here",
                            "Amount Paid": "Amount here",
                            "Amount Claimed": "Amount Claimed here",
                            "Amount Not Claimed": "Amount Not Claimed here",
                            "Unclaimed Amount Reason": "",
                            "Vendor Name": "Vendor Name here",
                            "Invoice Number": "Invoice Number here",
                            "Fee Type Code": "",
                            "Recovery Type": "",
                            "Actual Recovery Code": "",
                            "Expense Code": "",
                            "Fee Reference Comments": "",
                            "File Name": "",
                            "Document Available": "",
                            "Notes": "Notes here"
                        }]
                    }
                    """

#### Statistics

In [35]:
excelData = dataDirectory + "Python/" + loanNumber + ".json"
FrData = dataDirectory + "Python/" + loanNumber + "_FrOut.json"
FrMatching = dataDirectory + "Python/" + loanNumber + "_FrMatching.json"
FrNoMatching = dataDirectory + "Python/" + loanNumber + "_FrNonMatching.json"
LlmMatching = dataDirectory + "Python/" + loanNumber + "_LlmMatching.json"
LlmNoMatching = dataDirectory + "Python/" + loanNumber + "_LlmNonMatching.json"

with open(excelData, 'r') as file:
    excelData = json.load(file)
    print("Total Excel Data: ", len(excelData['Advances']))

with open(FrData, 'r') as file:
    FrData = json.load(file)
    print("Total FrData: ", len(FrData['Advances']))

with open(FrMatching, 'r') as file:
    FrMatching = json.load(file)
    print("Total FrMatching: ", len(FrMatching['Advances']))

with open(FrNoMatching, 'r') as file:
    FrNoMatching = json.load(file)
    print("Total FrNoMatching: ", len(FrNoMatching['Advances']))

with open(LlmMatching, 'r') as file:
    LlmMatching = json.load(file)
    print("Total LlmMatching: ", len(LlmMatching['Advances']))

# with open(LlmNoMatching, 'r') as file:
#     LlmNoMatching = json.load(file)
#     print("Total LlmNoMatching: ", len(LlmNoMatching['Advances']))


Total Excel Data:  63
Total FrData:  52
Total FrMatching:  23
Total FrNoMatching:  40
Total LlmMatching:  29
