In [None]:
import sys
sys.path.append('../../../')

import os
from dotenv import dotenv_values
from openai import AzureOpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from modules.app_settings import AppSettings
from modules.invoice import InvoiceData, InvoiceDataProduct
from modules.medical import MedicalRecord, MedicalRecordPatient, MedicalRecordReferral

from transformers import AutoProcessor, AutoModelForCausalLM  
from PIL import Image
import requests
import copy
%matplotlib inline 

In [None]:
# Get environment variables
working_dir = os.path.abspath('../../../')
settings = AppSettings(dotenv_values(f"{working_dir}/.env"))

In [None]:
# Configure Azure OpenAI
token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")

openai_client = AzureOpenAI(
  azure_endpoint = settings.completions_openai_endpoint,
  azure_ad_token_provider=token_provider,
  api_version="2024-02-01"
)

In [None]:
# Setup Florence-2-large model from Hugging Face
model_id = 'microsoft/Florence-2-large'
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval().cuda()
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

In [None]:
# Function to run a prompt with the Florence-2-large model
def run_example(task_prompt, image, text_input=None):
    if text_input is None:
        prompt = task_prompt
    else:
        prompt = task_prompt + text_input
    inputs = processor(text=prompt, images=image, return_tensors="pt")
    generated_ids = model.generate(
      input_ids=inputs["input_ids"].cuda(),
      pixel_values=inputs["pixel_values"].cuda(),
      max_new_tokens=1024,
      early_stopping=False,
      do_sample=False,
      num_beams=3,
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(
        generated_text, 
        task=task_prompt, 
        image_size=(image.width, image.height)
    )

    return parsed_answer


In [None]:
# Setup the PDF image file
pdf_image_file_name = 'Invoice-Complex-Scanned.pdf_0.png'
pdf_image_file_path = os.path.join(os.path.abspath('./'), pdf_image_file_name)
subfolder_path = os.path.join(os.path.abspath('./'), pdf_image_file_name.split('.')[0])

if not os.path.exists(subfolder_path):
    os.makedirs(subfolder_path)

expected = InvoiceData(
    invoice_number='3847193',
    purchase_order_number='15931',
    customer_name='Sharp Consulting',
    customer_address='73 Regal Way, Leeds, LS1 5AB, UK',
    delivery_date='2024-05-16',
    payable_by='2024-05-24',
    products=[
        InvoiceDataProduct(
            id='MA197',
            description='STRETCHWRAP ROLL',
            unit_price=16.62,
            quantity=5,
            total=83.10
        ),
        InvoiceDataProduct(
            id='ST4086',
            description='BALLPOINT PEN MED.',
            unit_price=2.49,
            quantity=10,
            total=24.90
        ),
        InvoiceDataProduct(
            id='JF9912413BF',
            description='BUBBLE FILM ROLL CL.',
            unit_price=15.46,
            quantity=12,
            total=185.52
        ),
    ],
    returns=[],
    total_product_quantity=27,
    total_product_price=293.52
)

image = Image.open(filePath)
image

In [None]:
task_prompt = '<OCR>'
# task_prompt = '<OCR_WITH_REGION>'
results = run_example(task_prompt, image)
print(results)

In [None]:
full_text = results[task_prompt]

In [None]:
json_structure = InvoiceData.empty_json_str()

completion = openai_client.chat.completions.create(
    model=settings.completions_openai_completion_model_deployment,
    messages=[
        {
            "role": "system",
            "content": "You are an AI assistant that extracts data from documents and returns them as structured JSON objects. Do not return as a code block.",
        },
        {
            "role": "user",
            "content": "Extract the data from this invoice. If a value is not present, provide null. Use the following structure: " + json_structure,
        },
        {
            "role": "user",
            "content": full_text,
        }
    ],
)

In [None]:
response_json_str = completion.choices[0].message.content
record = InvoiceData.from_json_str(response_json_str)

record_json = record.to_json()
accuracy = expected.compare_accuracy(record)
overall_accuracy_percent = accuracy['overall'] * 100

print(f"Response: {record_json}")
print(f"Overall Accuracy: {overall_accuracy_percent:.2f}%")

# Save accuracy result to a file
with open(f"{subfolder_path}/Accuracy.txt", "w") as f:
    f.write("Response:")
    f.write("\n")
    f.write(response_json_str)
    f.write("\n")
    f.write("\n")
    f.write(f"Overall Accuracy: {overall_accuracy_percent:.2f}%")
    f.write("\n")
    f.write("\n")
    f.write("Accuracy Detail:")
    f.write("\n")
    f.write(str(accuracy))

print(f"Accuracy result saved to {subfolder_path}/Accuracy.txt")