In [2]:
#  pip install azure-ai-formrecognizer

import os
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
from dotenv import load_dotenv
load_dotenv()

endpoint = os.environ["AZURE_AI_SERVICES_URL"]
key = os.environ["AZURE_AI_SERVICES_KEY"]

credential = AzureKeyCredential(key)
client = DocumentAnalysisClient(endpoint, credential)


## Layout Example

In [3]:
layout_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"

poller = client.begin_analyze_document_from_url("prebuilt-layout", layout_url)
result = poller.result()

for page in result.pages:
    print(f"Document Page {page.page_number} has {len(page.lines)} line(s), {len(page.words)} word(s),")
    print(f"and {len(page.selection_marks)} selection mark(s).")

    for i, line in enumerate(page.lines):
        print(f"  Line {i} has content: '{line.content}'.")
        print(f"    Its bounding box is:")
        print(f"      Upper left => X: {line.polygon[0].x}, Y= {line.polygon[0].y}")
        print(f"      Upper right => X: {line.polygon[1].x}, Y= {line.polygon[1].y}")
        print(f"      Lower right => X: {line.polygon[2].x}, Y= {line.polygon[2].y}")
        print(f"      Lower left => X: {line.polygon[3].x}, Y= {line.polygon[3].y}")

    for i, selection_mark in enumerate(page.selection_marks):
        print(f"  Selection Mark {i} is {selection_mark.state}.")
        print(f"    Its bounding box is:")
        print(f"      Upper left => X: {selection_mark.polygon[0].x}, Y= {selection_mark.polygon[0].y}")
        print(f"      Upper right => X: {selection_mark.polygon[1].x}, Y= {selection_mark.polygon[1].y}")
        print(f"      Lower right => X: {selection_mark.polygon[2].x}, Y= {selection_mark.polygon[2].y}")
        print(f"      Lower left => X: {selection_mark.polygon[3].x}, Y= {selection_mark.polygon[3].y}")

for style in result.styles:
    # Check the style and style confidence to see if text is handwritten.
    # Note that value '0.8' is used as an example.
    is_handwritten = style.is_handwritten and style.confidence > 0.8

    if is_handwritten:
        print("Handwritten content found:")
        for span in style.spans:
            print(f"  Content: {result.content[span.offset:span.offset+span.length]}")

print("The following tables were extracted:")

for i, table in enumerate(result.tables):
    print(f"  Table {i} has {table.row_count} rows and {table.column_count} columns.")
    for cell in table.cells:
        print(f"    Cell ({cell.row_index}, {cell.column_index}) has kind '{cell.kind}' and content: '{cell.content}'.")

Document Page 1 has 67 line(s), 413 word(s),
and 14 selection mark(s).
  Line 0 has content: 'UNITED STATES'.
    Its bounding box is:
      Upper left => X: 3.4669, Y= 0.6636
      Upper right => X: 5.0236, Y= 0.6589
      Lower right => X: 5.0236, Y= 0.8451
      Lower left => X: 3.4669, Y= 0.8498
  Line 1 has content: 'SECURITIES AND EXCHANGE COMMISSION'.
    Its bounding box is:
      Upper left => X: 2.168, Y= 0.8737
      Upper right => X: 6.3129, Y= 0.8737
      Lower right => X: 6.3129, Y= 1.0647
      Lower left => X: 2.168, Y= 1.0647
  Line 2 has content: 'Washington, D.C. 20549'.
    Its bounding box is:
      Upper left => X: 3.443, Y= 1.0885
      Upper right => X: 5.057, Y= 1.0838
      Lower right => X: 5.057, Y= 1.2509
      Lower left => X: 3.443, Y= 1.2604
  Line 3 has content: 'FORM 10-Q'.
    Its bounding box is:
      Upper left => X: 3.7104, Y= 1.3893
      Upper right => X: 4.7944, Y= 1.3893
      Lower right => X: 4.7944, Y= 1.5898
      Lower left => X: 3.7104,

## Invoice Examples

In [4]:
invoice_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-invoice.pdf"

poller = client.begin_analyze_document_from_url("prebuilt-invoice", invoice_url)
result = poller.result()

for i, document in enumerate(result.documents):
    print(f"Document {i}:")

    if "VendorName" in document.fields:
        vendor_name_field = document.fields["VendorName"]
        if vendor_name_field.value_type == "string":
            vendor_name = vendor_name_field.value
            print(f"Vendor Name: '{vendor_name}', with confidence {vendor_name_field.confidence}")

    if "CustomerName" in document.fields:
        customer_name_field = document.fields["CustomerName"]
        if customer_name_field.value_type == "string":
            customer_name = customer_name_field.value
            print(f"Customer Name: '{customer_name}', with confidence {customer_name_field.confidence}")

    if "Items" in document.fields:
        items_field = document.fields["Items"]
        if items_field.value_type == "list":
            for item_field in items_field.value:
                print("Item:")
                if item_field.value_type == "dictionary":
                    item_fields = item_field.value

                    if "Description" in item_fields:
                        item_description_field = item_fields["Description"]
                        item_description = item_description_field.value
                        print(f"  Description: '{item_description}', with confidence {item_description_field.confidence}")

                    if "Amount" in item_fields:
                        item_amount_field = item_fields["Amount"]
                        item_amount = item_amount_field.value
                        print(f"  Amount: '{item_amount}', with confidence {item_amount_field.confidence}")

    if "SubTotal" in document.fields:
        sub_total_field = document.fields["SubTotal"]
        sub_total = sub_total_field.value
        print(f"Sub Total: '{sub_total}', with confidence {sub_total_field.confidence}")

    if "TotalTax" in document.fields:
        total_tax_field = document.fields["TotalTax"]
        total_tax = total_tax_field.value
        print(f"Total Tax: '{total_tax}', with confidence {total_tax_field.confidence}")

    if "InvoiceTotal" in document.fields:
        invoice_total_field = document.fields["InvoiceTotal"]
        invoice_total = invoice_total_field.value
        print(f"Invoice Total: '{invoice_total}', with confidence {invoice_total_field.confidence}")

Document 0:
Vendor Name: 'CONTOSO LTD.', with confidence 0.93
Customer Name: 'MICROSOFT CORPORATION', with confidence 0.915
Item:
  Description: 'Test for 23 fields', with confidence 0.914
  Amount: '$100.0', with confidence 0.913
Sub Total: '$100.0', with confidence 0.969
Total Tax: '$10.0', with confidence 0.97
Invoice Total: '$110.0', with confidence 0.97


In [None]:
invoice_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-invoice.pdf"

poller = client.begin_analyze_document_from_url("prebuilt-invoice", invoice_url)
invoices = poller.result()

for idx, invoice in enumerate(invoices.documents):
    print("--------Recognizing invoice #{}--------".format(idx + 1))
    vendor_name = invoice.fields.get("VendorName")
    if vendor_name:
        print(
            "Vendor Name: {} has confidence: {}".format(
                vendor_name.value, vendor_name.confidence
            )
        )
    vendor_address = invoice.fields.get("VendorAddress")
    if vendor_address:
        print(
            "Vendor Address: {} has confidence: {}".format(
                vendor_address.value, vendor_address.confidence
            )
        )
    vendor_address_recipient = invoice.fields.get("VendorAddressRecipient")
    if vendor_address_recipient:
        print(
            "Vendor Address Recipient: {} has confidence: {}".format(
                vendor_address_recipient.value, vendor_address_recipient.confidence
            )
        )
    customer_name = invoice.fields.get("CustomerName")
    if customer_name:
        print(
            "Customer Name: {} has confidence: {}".format(
                customer_name.value, customer_name.confidence
            )
        )
    customer_id = invoice.fields.get("CustomerId")
    if customer_id:
        print(
            "Customer Id: {} has confidence: {}".format(
                customer_id.value, customer_id.confidence
            )
        )
    customer_address = invoice.fields.get("CustomerAddress")
    if customer_address:
        print(
            "Customer Address: {} has confidence: {}".format(
                customer_address.value, customer_address.confidence
            )
        )
    customer_address_recipient = invoice.fields.get("CustomerAddressRecipient")
    if customer_address_recipient:
        print(
            "Customer Address Recipient: {} has confidence: {}".format(
                customer_address_recipient.value,
                customer_address_recipient.confidence,
            )
        )
    invoice_id = invoice.fields.get("InvoiceId")
    if invoice_id:
        print(
            "Invoice Id: {} has confidence: {}".format(
                invoice_id.value, invoice_id.confidence
            )
        )
    invoice_date = invoice.fields.get("InvoiceDate")
    if invoice_date:
        print(
            "Invoice Date: {} has confidence: {}".format(
                invoice_date.value, invoice_date.confidence
            )
        )
    invoice_total = invoice.fields.get("InvoiceTotal")
    if invoice_total:
        print(
            "Invoice Total: {} has confidence: {}".format(
                invoice_total.value, invoice_total.confidence
            )
        )
    due_date = invoice.fields.get("DueDate")
    if due_date:
        print(
            "Due Date: {} has confidence: {}".format(
                due_date.value, due_date.confidence
            )
        )
    purchase_order = invoice.fields.get("PurchaseOrder")
    if purchase_order:
        print(
            "Purchase Order: {} has confidence: {}".format(
                purchase_order.value, purchase_order.confidence
            )
        )
    billing_address = invoice.fields.get("BillingAddress")
    if billing_address:
        print(
            "Billing Address: {} has confidence: {}".format(
                billing_address.value, billing_address.confidence
            )
        )
    billing_address_recipient = invoice.fields.get("BillingAddressRecipient")
    if billing_address_recipient:
        print(
            "Billing Address Recipient: {} has confidence: {}".format(
                billing_address_recipient.value,
                billing_address_recipient.confidence,
            )
        )
    shipping_address = invoice.fields.get("ShippingAddress")
    if shipping_address:
        print(
            "Shipping Address: {} has confidence: {}".format(
                shipping_address.value, shipping_address.confidence
            )
        )
    shipping_address_recipient = invoice.fields.get("ShippingAddressRecipient")
    if shipping_address_recipient:
        print(
            "Shipping Address Recipient: {} has confidence: {}".format(
                shipping_address_recipient.value,
                shipping_address_recipient.confidence,
            )
        )
    print("Invoice items:")
    for idx, item in enumerate(invoice.fields.get("Items").value):
        print("...Item #{}".format(idx + 1))
        item_description = item.value.get("Description")
        if item_description:
            print(
                "......Description: {} has confidence: {}".format(
                    item_description.value, item_description.confidence
                )
            )
        item_quantity = item.value.get("Quantity")
        if item_quantity:
            print(
                "......Quantity: {} has confidence: {}".format(
                    item_quantity.value, item_quantity.confidence
                )
            )
        unit = item.value.get("Unit")
        if unit:
            print(
                "......Unit: {} has confidence: {}".format(
                    unit.value, unit.confidence
                )
            )
        unit_price = item.value.get("UnitPrice")
        if unit_price:
            print(
                "......Unit Price: {} has confidence: {}".format(
                    unit_price.value, unit_price.confidence
                )
            )
        product_code = item.value.get("ProductCode")
        if product_code:
            print(
                "......Product Code: {} has confidence: {}".format(
                    product_code.value, product_code.confidence
                )
            )
        item_date = item.value.get("Date")
        if item_date:
            print(
                "......Date: {} has confidence: {}".format(
                    item_date.value, item_date.confidence
                )
            )
        tax = item.value.get("Tax")
        if tax:
            print(
                "......Tax: {} has confidence: {}".format(tax.value, tax.confidence)
            )
        amount = item.value.get("Amount")
        if amount:
            print(
                "......Amount: {} has confidence: {}".format(
                    amount.value, amount.confidence
                )
            )
    subtotal = invoice.fields.get("SubTotal")
    if subtotal:
        print(
            "Subtotal: {} has confidence: {}".format(
                subtotal.value, subtotal.confidence
            )
        )
    total_tax = invoice.fields.get("TotalTax")
    if total_tax:
        print(
            "Total Tax: {} has confidence: {}".format(
                total_tax.value, total_tax.confidence
            )
        )
    previous_unpaid_balance = invoice.fields.get("PreviousUnpaidBalance")
    if previous_unpaid_balance:
        print(
            "Previous Unpaid Balance: {} has confidence: {}".format(
                previous_unpaid_balance.value, previous_unpaid_balance.confidence
            )
        )
    amount_due = invoice.fields.get("AmountDue")
    if amount_due:
        print(
            "Amount Due: {} has confidence: {}".format(
                amount_due.value, amount_due.confidence
            )
        )
    service_start_date = invoice.fields.get("ServiceStartDate")
    if service_start_date:
        print(
            "Service Start Date: {} has confidence: {}".format(
                service_start_date.value, service_start_date.confidence
            )
        )
    service_end_date = invoice.fields.get("ServiceEndDate")
    if service_end_date:
        print(
            "Service End Date: {} has confidence: {}".format(
                service_end_date.value, service_end_date.confidence
            )
        )
    service_address = invoice.fields.get("ServiceAddress")
    if service_address:
        print(
            "Service Address: {} has confidence: {}".format(
                service_address.value, service_address.confidence
            )
        )
    service_address_recipient = invoice.fields.get("ServiceAddressRecipient")
    if service_address_recipient:
        print(
            "Service Address Recipient: {} has confidence: {}".format(
                service_address_recipient.value,
                service_address_recipient.confidence,
            )
        )
    remittance_address = invoice.fields.get("RemittanceAddress")
    if remittance_address:
        print(
            "Remittance Address: {} has confidence: {}".format(
                remittance_address.value, remittance_address.confidence
            )
        )
    remittance_address_recipient = invoice.fields.get("RemittanceAddressRecipient")
    if remittance_address_recipient:
        print(
            "Remittance Address Recipient: {} has confidence: {}".format(
                remittance_address_recipient.value,
                remittance_address_recipient.confidence,
            )
        )
    print("----------------------------------------")