In [1913]:
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential
import os
from dotenv import load_dotenv
from typing import Optional

from langchain.chains.openai_functions import (
    create_openai_fn_chain,
    create_structured_output_chain,
)
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import HumanMessage, SystemMessage
load_dotenv()


True

# Extracting information using form recognizer


In [1914]:
endpoint = os.environ["FORM_RECOGNIZER_ENDPOINT"]
key = os.environ["FORM_RECOGNIZER_KEY"]

credential = AzureKeyCredential(key)
client = DocumentAnalysisClient(endpoint, credential)

# Submit the document for analysis
with open(r"C:\Users\ashbhand\Desktop\openai-invoice-demo-app-1\data\Care Payment Invoice examples .pdf", "rb") as f:
    # print(f)
    poller = client.begin_analyze_document("prebuilt-layout",document =f)

# Wait for analysis to complete
result = poller.result()

In [1915]:
raw_content_lst = []
raw_content = ""
for page in result.pages:
    for line in page.lines:
        raw_content += line.content + "\n"
        raw_content_lst.append(line.content)

In [1916]:
raw_content

'For Information Only: Do Not Pay\nLeaders in Consumer Directed Care\n"We Are Your Someone When Your Someone Is Not There"\nUnit 1/249 Coronation Drive\nMilton QLD 4064\np: 3356 2563\ne: accountsreceivable@angelsinaprons.com.au\nA.B.N.\n89 169 762 476\nTax Invoice\nInvoice No.:\n00331573\nDate:\n29/03/2023\nPO #:\nEllen Erba\nAdress\nPAYMENT TERMS\n7 DAYS FROM\nINVOICE DATE.\nDATE\nUNITS\nNOTES\nRATE\nTOTAL(ex-GST)\n14/03/2023\n2\nHour\nName Brokerage M-F\n15:30pm-17:30pm\n69.00\n$138.00\n16/03/2023\n2\nHour\nName Brokerage M-F\n15:30pm-17:30pm\n69.00\n$138.00\n18/03/2023\n2\nHour\nName Brokerage Saturday\n15:30pm-17:30pm\n89.30\n$178.60\nThank You, I hope you have been pleased with\nour service, and if there is anything more we can\ndo please do not hesitate to call us.\nSubtotal:\n$454.60\nGST:\n$45.46\nTOTAL:\n$500.06\nPaid to Date:\n$0.00\nBalance Due:\n$500.06\n-----------------------------------------------------------------------------------------------------------------------\n

# using LLM to extract information from invoices 

In [1917]:
sample_json_format = "{ \"line_items\":\"\",\n }"

In [1918]:
json_schema = {
    "title": "id",
    "description": "Extract information from invoice",
    "type": "object",
    "properties": {
        "id": {"title": "ID", "description": "assign a random UUID as value to id key", "type": "string"},
        "name_of_the_company": {"title": "name_of_the_company", "description": "Identify the name of the company", "type": "string"},
        "address": {"title": "address", "description": "Identify the address of the company", "type": "string"},
        "issued_to": {"title": "issued_to", "description": "which company was the invoice issued to", "type": "string"},
        "total_amount": {"title": "total_amount", "description": "What is the total amount of the invoice", "type": "string"},
        "balance": {"title": "balance", "description": "What is the balance amount of the invoice", "type": "string"},
        "invoice_number": {"title": "invoice_number", "description": "what is the invoice number", "type": "string"},
        "invoice_date": {"title": "invoice_date", "description": "what is the date the invoice issued", "type": "string"},
        "shipment_details": {"title": "shipment_details", "description": "Please mentions the details of the shipment", "type": "string"},
        "line_items": {"title": "line_items", "description": "what are line items mentioned and please follow the {sample_json_format} format", "type": "string"},
        "payment_terms": {"title": "payment_terms", "description": "what is the payment terms mentioned", "type": "string"}
    },
    "required": ["id", "name_of_the_company","address","issued_to","total_amount","balance","invoice_number","invoice_date","shipment_details","line_items","payment_terms"],
}

In [1919]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a world class algorithm for extracting information in structured formats."),
        ("human", "Use the given format to extract information from the following input: {input}"),
        ("human", "Tip: Make sure to answer in the correct format"),
    ]
)
llm = ChatOpenAI(engine="gpt_3_5_turbo",temperature=0)

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


In [1920]:
def invoice_processing(json_schema,input_text):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are a world class algorithm for extracting information in structured formats."),
            ("human", "Use the given format to extract information from the following input: {input_text}"),
            ("human", "Tip: Make sure to answer in the correct format"),
        ]
    )
    llm = ChatOpenAI(engine="gpt_3_5_turbo",temperature=0)
    chain = create_structured_output_chain(json_schema , llm, prompt, verbose=True)
    output = chain.run(input_text)
    return output

In [1921]:
invoice_processing(json_schema,raw_content)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a world class algorithm for extracting information in structured formats.
Human: Use the given format to extract information from the following input: For Information Only: Do Not Pay
Leaders in Consumer Directed Care
"We Are Your Someone When Your Someone Is Not There"
Unit 1/249 Coronation Drive
Milton QLD 4064
p: 3356 2563
e: accountsreceivable@angelsinaprons.com.au
A.B.N.
89 169 762 476
Tax Invoice
Invoice No.:
00331573
Date:
29/03/2023
PO #:
Ellen Erba
Adress
PAYMENT TERMS
7 DAYS FROM
INVOICE DATE.
DATE
UNITS
NOTES
RATE
TOTAL(ex-GST)
14/03/2023
2
Hour
Name Brokerage M-F
15:30pm-17:30pm
69.00
$138.00
16/03/2023
2
Hour
Name Brokerage M-F
15:30pm-17:30pm
69.00
$138.00
18/03/2023
2
Hour
Name Brokerage Saturday
15:30pm-17:30pm
89.30
$178.60
Thank You, I hope you have been pleased with
our service, and if there is anything more we can
do please do not hesitate to call us.
Subtotal:
$454.60
G


[1m> Finished chain.[0m


{'id': '123456',
 'name_of_the_company': 'Angels in Aprons Pty Ltd',
 'address': 'Unit 1, 249 Coronation Drive, Milton Qld 4064',
 'issued_to': 'Ellen Erba',
 'total_amount': '$500.06',
 'balance': '$500.06'}

In [1922]:
llm = ChatOpenAI(engine="gpt_3_5_turbo",temperature=0)

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


In [1923]:
print(llm)

cache=None verbose=False callbacks=None callback_manager=None tags=None metadata=None client=<class 'openai.api_resources.chat_completion.ChatCompletion'> model_name='gpt-3.5-turbo' temperature=0.0 model_kwargs={'engine': 'gpt_3_5_turbo'} openai_api_key='a393d2fb4dd54af99decbb14845f7715' openai_api_base='https://azure-openai-invoice-accelarator-internal-apac.openai.azure.com/' openai_organization='' openai_proxy='' request_timeout=None max_retries=6 streaming=False n=1 max_tokens=None tiktoken_model_name=None


In [1924]:
# import os
# import openai

# openai.api_key = 'a393d2fb4dd54af99decbb14845f7715'
# openai.api_version = "2023-07-01-preview"
# openai.api_type = "azure"
# openai.api_base = 'https://azure-openai-invoice-accelarator-internal-apac.openai.azure.com/'

# messages= [
#     {"role": "user", "content": "You are a world class algorithm for extracting information in structured formats"}
# ]

# functions= [  
#     {
#     "title": "id",
#     "description": "Extract information from invoice",
#     "type": "object",
#     "properties": {
#         "id": {"title": "ID", "description": "assign a random UUID as value to id key", "type": "string"},
#         "name_of_the_company": {"title": "name_of_the_company", "description": "Identify the name of the company", "type": "string"},
#         "address": {"title": "address", "description": "Identify the address of the company", "type": "string"},
#         "issued_to": {"title": "issued_to", "description": "which company was the invoice assigned to", "type": "string"},
#         "total_amount": {"title": "total_amount", "description": "What is the total amount of the invoice", "type": "string"},
#         "balance": {"title": "balance", "description": "What is the balance amount of the invoice", "type": "string"},
#         "invoice_number": {"title": "invoice_number", "description": "what is the invoice number", "type": "string"},
#         "invoice_date": {"title": "invoice_date", "description": "what is the date the invoice issued", "type": "string"},
#         "shipment_details": {"title": "shipment_details", "description": "Please mentions the details of the shipment", "type": "string"},
#         "line_items": {"title": "line_items", "description": "what are line items mentioned and please follow the {sample_json_format} format", "type": "string"},
#         "payment_terms": {"title": "payment_terms", "description": "what is the payment terms mentioned", "type": "string"}
#     },
#     "required": ["id", "name_of_the_company","address","issued_to","total_amount","balance","invoice_number","invoice_date","shipment_details","line_items","payment_terms"],
# }
# ]  

# response = openai.ChatCompletion.create(
#     engine="gpt_3_5_turbo",
#     messages=messages,
#     functions=functions,
#     function_call="auto", 
# )

# print(response['choices'][0]['message'])

In [1925]:
chain = create_structured_output_chain(json_schema , llm, prompt, verbose=True)
output = chain.run(raw_content)




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a world class algorithm for extracting information in structured formats.
Human: Use the given format to extract information from the following input: For Information Only: Do Not Pay
Leaders in Consumer Directed Care
"We Are Your Someone When Your Someone Is Not There"
Unit 1/249 Coronation Drive
Milton QLD 4064
p: 3356 2563
e: accountsreceivable@angelsinaprons.com.au
A.B.N.
89 169 762 476
Tax Invoice
Invoice No.:
00331573
Date:
29/03/2023
PO #:
Ellen Erba
Adress
PAYMENT TERMS
7 DAYS FROM
INVOICE DATE.
DATE
UNITS
NOTES
RATE
TOTAL(ex-GST)
14/03/2023
2
Hour
Name Brokerage M-F
15:30pm-17:30pm
69.00
$138.00
16/03/2023
2
Hour
Name Brokerage M-F
15:30pm-17:30pm
69.00
$138.00
18/03/2023
2
Hour
Name Brokerage Saturday
15:30pm-17:30pm
89.30
$178.60
Thank You, I hope you have been pleased with
our service, and if there is anything more we can
do please do not hesitate to call us.
Subtotal:
$454.60
G


[1m> Finished chain.[0m


In [1958]:
output

{'id': '123456',
 'name_of_the_company': 'Leaders in Consumer Directed Care',
 'address': 'Unit 1/249 Coronation Drive, Milton QLD 4064',
 'issued_to': 'Angels in Aprons Pty Ltd',
 'total_amount': '$500.06',
 'balance': '$500.06',
 'invoice_number': '00331573',
 'invoice_date': '29/03/2023',
 'shipment_details': '',
 'line_items': '[{"date": "14/03/2023", "units": "2", "notes": "Hour", "rate": "69.00", "total": "$138.00"}, {"date": "16/03/2023", "units": "2", "notes": "Hour", "rate": "69.00", "total": "$138.00"}, {"date": "18/03/2023", "units": "2", "notes": "Hour", "rate": "89.30", "total": "$178.60"}]',
 'payment_terms': '7 DAYS FROM INVOICE DATE'}

In [1952]:
json_schema_1 = {
    "title": "id",
    "description": "Extract information from invoice",
    "type": "object",
    "properties": {
        "id": {"title": "ID", "description": "assign a UUID as value to id key", "type": "string"},
        "name_of_the_company": {"title": "name_of_the_company", "description": "Identify the name of the company", "type": "string"},
        "address": {"title": "address", "description": "Identify the address of the company", "type": "string"},
        "issued_to": {"title": "issued_to", "description": "which company was the invoice issued to", "type": "string"},
        "total_amount": {"title": "total_amount", "description": "What is the total amount of the invoice", "type": "string"},
        "balance": {"title": "balance", "description": "What is the balance amount of the invoice", "type": "string"},
    },
    "required": ["id", "name_of_the_company","address","issued_to","total_amount","balance","invoice_number","invoice_date","shipment_details","line_items","payment_terms"],
}

In [1953]:
json_schema_2 = {
    "title": "id",
    "description": "Extract information from invoice",
    "type": "object",
    "properties": {
        "invoice_number": {"title": "invoice_number", "description": "what is the invoice number", "type": "string"},
        "invoice_date": {"title": "invoice_date", "description": "what is the date the invoice issued", "type": "string"},
        "shipment_details": {"title": "shipment_details", "description": "Please extract all the shipment details", "type": "string"},
        "line_items": {"title": "line_items", "description": "what are line items mentioned and please follow the json pattern of {sample_json_format} format", "type": "string"},
        "payment_terms": {"title": "payment_terms", "description": "please mention only the payment terms mentioned in the invoice", "type": "string"}
    },
    "required": ["shipment_details","payment_terms", "line_items"],
}

In [1954]:
def process_multiple_schemas(input_text,json_schema_1,json_schema_2):
    results = []
    
    # Process using the first schema
    result_1 = invoice_processing(json_schema_1, input_text)
    results.append(result_1)
    
    # Process using the second schema
    result_2 = invoice_processing(json_schema_2, input_text)
    results.append(result_2)
    
    return results

In [1955]:
Output_bifurcated_schemas =process_multiple_schemas(raw_content,json_schema_1,json_schema_2)
final_output = {key: value for d in Output_bifurcated_schemas for key, value in d.items()}



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a world class algorithm for extracting information in structured formats.
Human: Use the given format to extract information from the following input: For Information Only: Do Not Pay
Leaders in Consumer Directed Care
"We Are Your Someone When Your Someone Is Not There"
Unit 1/249 Coronation Drive
Milton QLD 4064
p: 3356 2563
e: accountsreceivable@angelsinaprons.com.au
A.B.N.
89 169 762 476
Tax Invoice
Invoice No.:
00331573
Date:
29/03/2023
PO #:
Ellen Erba
Adress
PAYMENT TERMS
7 DAYS FROM
INVOICE DATE.
DATE
UNITS
NOTES
RATE
TOTAL(ex-GST)
14/03/2023
2
Hour
Name Brokerage M-F
15:30pm-17:30pm
69.00
$138.00
16/03/2023
2
Hour
Name Brokerage M-F
15:30pm-17:30pm
69.00
$138.00
18/03/2023
2
Hour
Name Brokerage Saturday
15:30pm-17:30pm
89.30
$178.60
Thank You, I hope you have been pleased with
our service, and if there is anything more we can
do please do not hesitate to call us.
Subtotal:
$454.60
G

In [1957]:
Output_bifurcated_schemas

[{'id': '1',
  'name_of_the_company': 'Leaders in Consumer Directed Care',
  'address': 'Unit 1/249 Coronation Drive, Milton QLD 4064',
  'issued_to': 'Ellen Erba',
  'total_amount': '$500.06',
  'balance': '$500.06'},
 {'invoice_number': '00331573',
  'invoice_date': '29/03/2023',
  'shipment_details': 'SHIP TO:\nName\nAddress\n\nSOLD TO:\nCAPITAL GUARDIANS\n10/585 LITTLE COLLINS ST\nMELBOURNE VIC 3000',
  'line_items': 'STOCK CODE\nDESCRIPTION\nSUPPLIED\nQTY\nBACK ORDER\nQTY\nPRICE\nGST\nNET\n15230002\nPOISE LINERS REGULAR FEMALE 50ml 19-25cm\nWHITE PKT X 26 91853\n3.00\n0.00\n4.41\n0.00\n13.23',
  'payment_terms': '7 DAYS FROM\nINVOICE DATE.'}]

In [1932]:
final_output

{'id': '123456',
 'name_of_the_company': 'Angels in Aprons Pty Ltd',
 'address': 'Unit 1, 249 Coronation Drive, Milton Qld 4064',
 'issued_to': 'Ellen Erba',
 'total_amount': '$500.06',
 'balance': '$500.06',
 'invoice_number': '00331573',
 'invoice_date': '29/03/2023',
 'shipment_details': 'SHIP TO:\nName\nAddress\nSOLD TO:\nCAPITAL GUARDIANS\n10/585 LITTLE COLLINS ST\nMELBOURNE VIC 3000',
 'line_items': 'STOCK CODE\nDESCRIPTION\nSUPPLIED\nQTY\nBACK ORDER\nQTY\nPRICE\nGST\nNET\n15230002\nPOISE LINERS REGULAR FEMALE 50ml 19-25cm\nWHITE PKT X 26 91853\n3.00\n0.00\n4.41\n0.00\n13.23',
 'payment_terms': '7 DAYS FROM\nINVOICE DATE.'}

In [1933]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a world class algorithm for extracting information in structured formats."),
        ("human", "Use the given format to extract information from the following input: {input}"),
        ("human", "Tip: Make sure to answer in the correct format"),
    ]
)

In [1934]:
llm = ChatOpenAI(engine="gpt_3_5_turbo",temperature=0)

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


In [1935]:
from langchain.agents.self_ask_with_search.output_parser import SelfAskOutputParser
chain = create_structured_output_chain(json_schema_2 , llm, prompt, verbose=True)
chain.run(raw_content)
# output_parser = SelfAskOutputParser()
# parsed_output = output_parser.parse(output)
# print(parsed_output)




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a world class algorithm for extracting information in structured formats.
Human: Use the given format to extract information from the following input: For Information Only: Do Not Pay
Leaders in Consumer Directed Care
"We Are Your Someone When Your Someone Is Not There"
Unit 1/249 Coronation Drive
Milton QLD 4064
p: 3356 2563
e: accountsreceivable@angelsinaprons.com.au
A.B.N.
89 169 762 476
Tax Invoice
Invoice No.:
00331573
Date:
29/03/2023
PO #:
Ellen Erba
Adress
PAYMENT TERMS
7 DAYS FROM
INVOICE DATE.
DATE
UNITS
NOTES
RATE
TOTAL(ex-GST)
14/03/2023
2
Hour
Name Brokerage M-F
15:30pm-17:30pm
69.00
$138.00
16/03/2023
2
Hour
Name Brokerage M-F
15:30pm-17:30pm
69.00
$138.00
18/03/2023
2
Hour
Name Brokerage Saturday
15:30pm-17:30pm
89.30
$178.60
Thank You, I hope you have been pleased with
our service, and if there is anything more we can
do please do not hesitate to call us.
Subtotal:
$454.60
G


[1m> Finished chain.[0m


{'invoice_number': '00331573',
 'invoice_date': '29/03/2023',
 'shipment_details': 'SHIP TO:\nName\nAddress\nSOLD TO:\nCAPITAL GUARDIANS\n10/585 LITTLE COLLINS ST\nMELBOURNE VIC 3000',
 'line_items': 'STOCK CODE\nDESCRIPTION\nSUPPLIED\nQTY\nBACK ORDER\nQTY\nPRICE\nGST\nNET\n15230002\nPOISE LINERS REGULAR FEMALE 50ml 19-25cm\nWHITE PKT X 26 91853\n3.00\n0.00\n4.41\n0.00\n13.23',
 'payment_terms': '7 DAYS FROM\nINVOICE DATE.'}

# Normal Evaluation operation of the llm output generated

In [1936]:
template = '''
System: You are an AI assistant. You will be given the definition of an evaluation metric for assessing the quality of an answer in a question-answering task. Your job is to compute an accurate evaluation score using the provided evaluation metric.

User:
Relevance measures how well the answer addresses the main aspects of the question, based on the context. Consider whether all and only the important aspects are contained in the answer when evaluating relevance. Given the context and question, score the relevance of the answer between one to five stars using the following rating scale:
One star: the answer completely lacks relevance
Two stars: the answer is mostly relevant
Three stars: the answer has perfect relevance

This rating value should always be an integer between 1 and 3. So the rating produced should be 1 or 2 or 3

context: TAX INVOICE\nIntegrated Living\nPO BOX 2567\nDANGAR NSW 2309\nAUSTRALIA\nInvoice Date\n1 Aug 2022\nInvoice Number\nINV-0716\nReference\nJanet Sissions\nABN\n41 539 697 361\nInside Out Property And\nGarden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nItem\nDescription\nQuantity\nUnit Price\nGST\nAmount AUD\nCASE\nCase Manager\nSusan Swanson\n1.00\n0.00\n0.00\nCLIENT\nClient Name and Address\nJanet Sissions\n11/287-289 Charlestown RD\nCharlestown\n1.00\n0.00\n0.00\nMOULD\nMould Removal\nAttend clients home remove on main\nbedroom lounge room and spare bed room.\nJob completed 29.07.2022\n1.00\n120.00\n10%\n120.00\nSubtotal\n120.00\nTOTAL GST 10%\n12.00\nTOTAL AUD\n132.00\nDue Date: 15 Aug 2022\nInvoices payable within 14 days to the following bank account;\nInside Out Property And Garden Maintenance\nBSB 650 000\nACC 525 656 002\nInside Out\nPROPERTY &\nGARDEN\nMaintenance\n-X ---\nPAYMENT ADVICE\nTo:\nInside Out Property And Garden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nCustomer\nIntegrated Living\nInvoice Number\nINV-0716\nAmount Due\n132.00\nDue Date\n15 Aug 2022\nAmount Enclosed\nEnter the amount you are paying above\n

question: what is the name_of_the_company?
answer: All 1 Building Maintenance Pty Ltd

question: Identify the address of the company?
answer: PO Box 4078\nGoonellabah NSW 2480

question: which company was the invoice issued to?
answer: BGIS Pty Ltd

question: What is the total amount of the invoice?
answer: A$1,592.82

question: What is the balance amount of the invoice
answer: A$1,592.82

stars: 3

context: TAX INVOICE\nIntegrated Living\nPO BOX 2567\nDANGAR NSW 2309\nAUSTRALIA\nInvoice Date\n1 Aug 2022\nInvoice Number\nINV-0716\nReference\nJanet Sissions\nABN\n41 539 697 361\nInside Out Property And\nGarden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nItem\nDescription\nQuantity\nUnit Price\nGST\nAmount AUD\nCASE\nCase Manager\nSusan Swanson\n1.00\n0.00\n0.00\nCLIENT\nClient Name and Address\nJanet Sissions\n11/287-289 Charlestown RD\nCharlestown\n1.00\n0.00\n0.00\nMOULD\nMould Removal\nAttend clients home remove on main\nbedroom lounge room and spare bed room.\nJob completed 29.07.2022\n1.00\n120.00\n10%\n120.00\nSubtotal\n120.00\nTOTAL GST 10%\n12.00\nTOTAL AUD\n132.00\nDue Date: 15 Aug 2022\nInvoices payable within 14 days to the following bank account;\nInside Out Property And Garden Maintenance\nBSB 650 000\nACC 525 656 002\nInside Out\nPROPERTY &\nGARDEN\nMaintenance\n-X ---\nPAYMENT ADVICE\nTo:\nInside Out Property And Garden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nCustomer\nIntegrated Living\nInvoice Number\nINV-0716\nAmount Due\n132.00\nDue Date\n15 Aug 2022\nAmount Enclosed\nEnter the amount you are paying above\n

question: What is the total amount of the invoice?
answer:  A$4,290.00

question: What is the balance amount of the invoice
answer: A$1,592.82

question: What is the invoice number?
answer: 1457

question: what is the date the invoice issued?
answer: 02/02/2023

question: Please extract all the shipment details
answer: " "


question: please mention only the payment terms mentioned in the invoice?
answer: Net 30

stars: 2

context: TAX INVOICE\nIntegrated Living\nPO BOX 2567\nDANGAR NSW 2309\nAUSTRALIA\nInvoice Date\n1 Aug 2022\nInvoice Number\nINV-0716\nReference\nJanet Sissions\nABN\n41 539 697 361\nInside Out Property And\nGarden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nItem\nDescription\nQuantity\nUnit Price\nGST\nAmount AUD\nCASE\nCase Manager\nSusan Swanson\n1.00\n0.00\n0.00\nCLIENT\nClient Name and Address\nJanet Sissions\n11/287-289 Charlestown RD\nCharlestown\n1.00\n0.00\n0.00\nMOULD\nMould Removal\nAttend clients home remove on main\nbedroom lounge room and spare bed room.\nJob completed 29.07.2022\n1.00\n120.00\n10%\n120.00\nSubtotal\n120.00\nTOTAL GST 10%\n12.00\nTOTAL AUD\n132.00\nDue Date: 15 Aug 2022\nInvoices payable within 14 days to the following bank account;\nInside Out Property And Garden Maintenance\nBSB 650 000\nACC 525 656 002\nInside Out\nPROPERTY &\nGARDEN\nMaintenance\n-X ---\nPAYMENT ADVICE\nTo:\nInside Out Property And Garden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nCustomer\nIntegrated Living\nInvoice Number\nINV-0716\nAmount Due\n132.00\nDue Date\n15 Aug 2022\nAmount Enclosed\nEnter the amount you are paying above\n
question: what is the name_of_the_company?
answer: Moonlighting Connects Pty Ltd

question: Identify the address of the company?
answer: 394 Flinders Parade, Brighton, Q. 4017

question: which company was the invoice issued to?
answer: Integrated Living Australia Ltd

question: What is the total amount of the invoice?
answer: $4,290.00

question: What is the balance amount of the invoice
answer: $4,290.00
stars: 1

context: {context}

Use the context to score the stars for inputs
{inputs}
stars:
'''

In [1937]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
import json
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
)

In [1938]:
# dummy data for testing 
synthetic_data = {'id': '123456',
 'name_of_the_company': 'crayon Pty Ltd',
 'address': '1788 dandenong road clayton-3168',
 'issued_to': 'Maximum life PVT LTD',
 'total_amount': '$1000',
 'balance': '$1000',
 'invoice_number': '8540',
 'invoice_date': '31/09/2022',
 'shipment_details': '',
 'payment_terms': 'Net 40'}

In [1939]:
# formating questions from json schema and answers from the output.
descriptions = {key: value.get("description") for key, value in json_schema["properties"].items()}

# Format question-answer pairs
formatted_pairs = []
for key in descriptions:
    question = descriptions[key]
    answer = final_output.get(key)
    formatted_pairs.append(f"question: {question}\nanswer: {answer}\n")

# Print formatted pairs
for pair in formatted_pairs:
    print(pair)

question: assign a random UUID as value to id key
answer: 123456

question: Identify the name of the company
answer: Angels in Aprons Pty Ltd

question: Identify the address of the company
answer: Unit 1, 249 Coronation Drive, Milton Qld 4064

question: which company was the invoice issued to
answer: Ellen Erba

question: What is the total amount of the invoice
answer: $500.06

question: What is the balance amount of the invoice
answer: $500.06

question: what is the invoice number
answer: 00331573

question: what is the date the invoice issued
answer: 29/03/2023

question: Please mentions the details of the shipment
answer: SHIP TO:
Name
Address
SOLD TO:
CAPITAL GUARDIANS
10/585 LITTLE COLLINS ST
MELBOURNE VIC 3000

question: what are line items mentioned and please follow the {sample_json_format} format
answer: STOCK CODE
DESCRIPTION
SUPPLIED
QTY
BACK ORDER
QTY
PRICE
GST
NET
15230002
POISE LINERS REGULAR FEMALE 50ml 19-25cm
WHITE PKT X 26 91853
3.00
0.00
4.41
0.00
13.23

question: wh

In [1940]:
final_output

{'id': '123456',
 'name_of_the_company': 'Angels in Aprons Pty Ltd',
 'address': 'Unit 1, 249 Coronation Drive, Milton Qld 4064',
 'issued_to': 'Ellen Erba',
 'total_amount': '$500.06',
 'balance': '$500.06',
 'invoice_number': '00331573',
 'invoice_date': '29/03/2023',
 'shipment_details': 'SHIP TO:\nName\nAddress\nSOLD TO:\nCAPITAL GUARDIANS\n10/585 LITTLE COLLINS ST\nMELBOURNE VIC 3000',
 'line_items': 'STOCK CODE\nDESCRIPTION\nSUPPLIED\nQTY\nBACK ORDER\nQTY\nPRICE\nGST\nNET\n15230002\nPOISE LINERS REGULAR FEMALE 50ml 19-25cm\nWHITE PKT X 26 91853\n3.00\n0.00\n4.41\n0.00\n13.23',
 'payment_terms': '7 DAYS FROM\nINVOICE DATE.'}

In [1941]:
# gpt3_5_prompt = PromptTemplate(
#                 template=template,
#                 input_variables=["context","inputs"]
#             )

# system_message_prompt = SystemMessagePromptTemplate(prompt=gpt3_5_prompt)

# chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
# messages = chat_prompt.format_prompt(context=raw_content,inputs = formatted_pairs).to_messages()
# chat = ChatOpenAI(engine="gpt_3_5_turbo",temperature=0)
# resp = chat(messages)
# print(resp)
# # gpt4_json_output_before = json.loads(resp.content)

# Dynamic Evaluation method

In [1942]:
formatted_pairs

['question: assign a random UUID as value to id key\nanswer: 123456\n',
 'question: Identify the name of the company\nanswer: Angels in Aprons Pty Ltd\n',
 'question: Identify the address of the company\nanswer: Unit 1, 249 Coronation Drive, Milton Qld 4064\n',
 'question: which company was the invoice issued to\nanswer: Ellen Erba\n',
 'question: What is the total amount of the invoice\nanswer: $500.06\n',
 'question: What is the balance amount of the invoice\nanswer: $500.06\n',
 'question: what is the invoice number\nanswer: 00331573\n',
 'question: what is the date the invoice issued\nanswer: 29/03/2023\n',
 'question: Please mentions the details of the shipment\nanswer: SHIP TO:\nName\nAddress\nSOLD TO:\nCAPITAL GUARDIANS\n10/585 LITTLE COLLINS ST\nMELBOURNE VIC 3000\n',
 'question: what are line items mentioned and please follow the {sample_json_format} format\nanswer: STOCK CODE\nDESCRIPTION\nSUPPLIED\nQTY\nBACK ORDER\nQTY\nPRICE\nGST\nNET\n15230002\nPOISE LINERS REGULAR FEMALE 

In [1959]:
template_1 = '''
System: You are an AI assistant. You will be given the definition of an evaluation metric for assessing the quality of an answer in a question-answering task. Your job is to compute an accurate evaluation score using the provided evaluation metric.

User:
Relevance measures how well the answer addresses the main aspects of the question, based on the context. Consider whether all and only the important aspects are contained in the answer when evaluating relevance. Given the context and question, score the relevance of the answer between zero to one using the following rating scale:
Zero stars : the answer is wrong or incomplete
One stars : the answer is completely correct

This rating value should always be an integer between 0 and 1. So the rating produced should be 0 or 1

Example 1:

context: TAX INVOICE\nIntegrated Living\nPO BOX 2567\nDANGAR NSW 2309\nAUSTRALIA\nInvoice Date\n1 Aug 2022\nInvoice Number\nINV-0716\nReference\nJanet Sissions\nABN\n41 539 697 361\nInside Out Property And\nGarden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nItem\nDescription\nQuantity\nUnit Price\nGST\nAmount AUD\nCASE\nCase Manager\nSusan Swanson\n1.00\n0.00\n0.00\nCLIENT\nClient Name and Address\nJanet Sissions\n11/287-289 Charlestown RD\nCharlestown\n1.00\n0.00\n0.00\nMOULD\nMould Removal\nAttend clients home remove on main\nbedroom lounge room and spare bed room.\nJob completed 29.07.2022\n1.00\n120.00\n10%\n120.00\nSubtotal\n120.00\nTOTAL GST 10%\n12.00\nTOTAL AUD\n132.00\nDue Date: 15 Aug 2022\nInvoices payable within 14 days to the following bank account;\nInside Out Property And Garden Maintenance\nBSB 650 000\nACC 525 656 002\nInside Out\nPROPERTY &\nGARDEN\nMaintenance\n-X ---\nPAYMENT ADVICE\nTo:\nInside Out Property And Garden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nCustomer\nIntegrated Living\nInvoice Number\nINV-0716\nAmount Due\n132.00\nDue Date\n15 Aug 2022\nAmount Enclosed\nEnter the amount you are paying above\n

question: Identify the name of the company
answer: Moonlighting Connects Pty Ltd

stars : 1

Example 2:

context: TAX INVOICE\nIntegrated Living\nPO BOX 2567\nDANGAR NSW 2309\nAUSTRALIA\nInvoice Date\n1 Aug 2022\nInvoice Number\nINV-0716\nReference\nJanet Sissions\nABN\n41 539 697 361\nInside Out Property And\nGarden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nItem\nDescription\nQuantity\nUnit Price\nGST\nAmount AUD\nCASE\nCase Manager\nSusan Swanson\n1.00\n0.00\n0.00\nCLIENT\nClient Name and Address\nJanet Sissions\n11/287-289 Charlestown RD\nCharlestown\n1.00\n0.00\n0.00\nMOULD\nMould Removal\nAttend clients home remove on main\nbedroom lounge room and spare bed room.\nJob completed 29.07.2022\n1.00\n120.00\n10%\n120.00\nSubtotal\n120.00\nTOTAL GST 10%\n12.00\nTOTAL AUD\n132.00\nDue Date: 15 Aug 2022\nInvoices payable within 14 days to the following bank account;\nInside Out Property And Garden Maintenance\nBSB 650 000\nACC 525 656 002\nInside Out\nPROPERTY &\nGARDEN\nMaintenance\n-X ---\nPAYMENT ADVICE\nTo:\nInside Out Property And Garden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nCustomer\nIntegrated Living\nInvoice Number\nINV-0716\nAmount Due\n132.00\nDue Date\n15 Aug 2022\nAmount Enclosed\nEnter the amount you are paying above\n
question: Identify the name of the company
answer: Sunlight Connects Pty Ltd

stars : 0

Example 3:

context: TAX INVOICE\nIntegrated Living\nPO BOX 2567\nDANGAR NSW 2309\nAUSTRALIA\nInvoice Date\n1 Aug 2022\nInvoice Number\nINV-0716\nReference\nJanet Sissions\nABN\n41 539 697 361\nInside Out Property And\nGarden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nItem\nDescription\nQuantity\nUnit Price\nGST\nAmount AUD\nCASE\nCase Manager\nSusan Swanson\n1.00\n0.00\n0.00\nCLIENT\nClient Name and Address\nJanet Sissions\n11/287-289 Charlestown RD\nCharlestown\n1.00\n0.00\n0.00\nMOULD\nMould Removal\nAttend clients home remove on main\nbedroom lounge room and spare bed room.\nJob completed 29.07.2022\n1.00\n120.00\n10%\n120.00\nSubtotal\n120.00\nTOTAL GST 10%\n12.00\nTOTAL AUD\n132.00\nDue Date: 15 Aug 2022\nInvoices payable within 14 days to the following bank account;\nInside Out Property And Garden Maintenance\nBSB 650 000\nACC 525 656 002\nInside Out\nPROPERTY &\nGARDEN\nMaintenance\n-X ---\nPAYMENT ADVICE\nTo:\nInside Out Property And Garden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nCustomer\nIntegrated Living\nInvoice Number\nINV-0716\nAmount Due\n132.00\nDue Date\n15 Aug 2022\nAmount Enclosed\nEnter the amount you are paying above\n
question: what is the invoice number
answer: 8540

stars : 0

Example 4:

context: TAX INVOICE\nIntegrated Living\nPO BOX 2567\nDANGAR NSW 2309\nAUSTRALIA\nInvoice Date\n1 Aug 2022\nInvoice Number\nINV-0716\nReference\nJanet Sissions\nABN\n41 539 697 361\nInside Out Property And\nGarden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nItem\nDescription\nQuantity\nUnit Price\nGST\nAmount AUD\nCASE\nCase Manager\nSusan Swanson\n1.00\n0.00\n0.00\nCLIENT\nClient Name and Address\nJanet Sissions\n11/287-289 Charlestown RD\nCharlestown\n1.00\n0.00\n0.00\nMOULD\nMould Removal\nAttend clients home remove on main\nbedroom lounge room and spare bed room.\nJob completed 29.07.2022\n1.00\n120.00\n10%\n120.00\nSubtotal\n120.00\nTOTAL GST 10%\n12.00\nTOTAL AUD\n132.00\nDue Date: 15 Aug 2022\nInvoices payable within 14 days to the following bank account;\nInside Out Property And Garden Maintenance\nBSB 650 000\nACC 525 656 002\nInside Out\nPROPERTY &\nGARDEN\nMaintenance\n-X ---\nPAYMENT ADVICE\nTo:\nInside Out Property And Garden Maintenance\n65 Kula Rd\nMEDOWIE NSW 2318\nAUSTRALIA\nCustomer\nIntegrated Living\nInvoice Number\nINV-0716\nAmount Due\n132.00\nDue Date\n15 Aug 2022\nAmount Enclosed\nEnter the amount you are paying above\n
question: what is the invoice number
answer: 1769

stars : 1





context: {context}

Use the context to score the stars for inputs
{inputs}
stars:
'''

In [1962]:
gpt3_5_prompt = PromptTemplate(
                template=template_1,
                input_variables=["context","inputs"]
            )

system_message_prompt = SystemMessagePromptTemplate(prompt=gpt3_5_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
scoring = []
for i in formatted_pairs:
    # print(i)
    messages = chat_prompt.format_prompt(context=raw_content,inputs = i).to_messages()
    chat = ChatOpenAI(engine="gpt_3_5_turbo",temperature=0)
    resp = chat(messages)
    print(i)
    print(resp)
    scoring.append(resp.content)
    # scoring.append(resp)
# gpt4_json_output_before = json.loads(resp.content)

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.
                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: assign a random UUID as value to id key
answer: 123456

content='0' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: Identify the name of the company
answer: Angels in Aprons Pty Ltd

content='1' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: Identify the address of the company
answer: Unit 1, 249 Coronation Drive, Milton Qld 4064

content='1' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: which company was the invoice issued to
answer: Ellen Erba

content='1' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: What is the total amount of the invoice
answer: $500.06

content='1' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: What is the balance amount of the invoice
answer: $500.06

content='1' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: what is the invoice number
answer: 00331573

content='1' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: what is the date the invoice issued
answer: 29/03/2023

content='1' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: Please mentions the details of the shipment
answer: SHIP TO:
Name
Address
SOLD TO:
CAPITAL GUARDIANS
10/585 LITTLE COLLINS ST
MELBOURNE VIC 3000

content='1' additional_kwargs={} example=False


                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


question: what are line items mentioned and please follow the {sample_json_format} format
answer: STOCK CODE
DESCRIPTION
SUPPLIED
QTY
BACK ORDER
QTY
PRICE
GST
NET
15230002
POISE LINERS REGULAR FEMALE 50ml 19-25cm
WHITE PKT X 26 91853
3.00
0.00
4.41
0.00
13.23

content='1' additional_kwargs={} example=False
question: what is the payment terms mentioned
answer: 7 DAYS FROM
INVOICE DATE.

content='1' additional_kwargs={} example=False


In [1963]:
scoring

['0', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1']