In [None]:
from __future__ import annotations
from pydantic import BaseModel
from openai import OpenAI
import sys
sys.path.append('../../../')

import os
from dotenv import dotenv_values
from modules.app_settings import AppSettings

In [None]:
# Get environment variables
working_dir = os.path.abspath('../../../')
settings = AppSettings(dotenv_values(f"{working_dir}/.env"))

In [None]:
# Setup the Invoice Markdown file
markdown_path = '../../Assets/Invoices/'
markdown_file_name = 'Invoice-Markdown.md'

# Load the text from the markdown file
markdown_content = open(f"{markdown_path}{markdown_file_name}", "r").read()

print(markdown_content)

In [None]:
client = OpenAI(api_key=settings.openai_api_key)

In [None]:
class InvoiceData(BaseModel):
    invoice_number: str
    purhcase_order_number: str
    customer_name: str
    customer_address: str
    delivery_date: str
    payable_by: str
    total_product_quantity: float
    total_product_price: float
    products: list[InvoiceProduct]
    
class InvoiceProduct(BaseModel):
    id: str
    description: str | None
    unit_price: float
    quantity: float
    total_price: float
    reason: str | None

In [None]:
completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {
            "role": "system",
            "content": "You are an AI assistant that extracts data from documents and returns them as structured JSON objects. Do not return as a code block."
        },
        {
            "role": "user",
            "content": "Extract the data from this invoice. If a value is not present, provide null."
        },
        {
            "role": "user",
            "content": markdown_content
        }
    ],
    response_format=InvoiceData
)

In [None]:
invoice = completion.choices[0].message.parsed

print(invoice.json())