In [None]:
from main import AwsInvoiceCredit
from langchain.output_parsers import PydanticOutputParser

parser = PydanticOutputParser(pydantic_object=AwsInvoiceCredit)
print(parser.get_format_instructions())

In [None]:
import os
from main import remove_footer
from langchain_community.document_loaders import PyMuPDFLoader

def read_invoice(invoice_document):
    loader = PyMuPDFLoader(invoice_document)
    data = loader.load()
    invoice = data[0].page_content
    invoice = remove_footer(invoice)
     # get parent folder name
    parent_folder = os.path.basename(os.path.dirname(invoice_document))
    # get file name only
    file_name = os.path.basename(invoice_document)
    # extract doit payer id from the parent folder name
    payer_id = parent_folder.split("_")[1]
    # add file name to the invoice
    invoice = f"File name: {file_name}\nDoiT payer id: {payer_id}\n" + invoice
    return invoice
  
file_name = input("Enter invoice file path")
print(read_invoice(file_name))

In [None]:
import asyncio
from langchain_openai import ChatOpenAI
from main import extract_data

async def process_invoice(file_path):
    invoice = read_invoice(file_path)
    # Instantiate the model.
    llm = ChatOpenAI(
        model="gpt-4o-mini",
        openai_api_key=os.getenv("OPENAI_API_KEY"),
        temperature=0.0,
        max_tokens=16384,
        top_p=0.0,
    )
    try:
        parsed = await extract_data(llm, document=invoice, sem=asyncio.Semaphore(1))
        if isinstance(parsed, Exception):
            print(f"Error processing invoice: {parsed}")
        elif isinstance(parsed, AwsInvoiceCredit):
            # Print parsed data as a JSON object
            print(parsed.model_dump_json())
        else:
            print(f"Unexpected result type: {type(parsed)}")
    except Exception as e:
        print(f"Unexpected error processing invoice: {e}")

# Replace 'your_invoice_file_path.pdf' with the actual file path
file_path = input("Enter invoice file path: ")
await process_invoice(file_path)