### Imports and shared

In [1]:
import os
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential

'''
Authenticates your credentials and creates a client.
'''
key = "ab1b89ca2b7c450f9ccbafd9187e62cc"
endpoint = "https://cognitiveservicesactsoft.cognitiveservices.azure.com/"

document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )


### Read model

In [9]:
# formatting function
def format_polygon(polygon):
    if not polygon:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])

def analyze_read(path):
    if 'http' in path:
        poller = document_analysis_client.begin_analyze_document_from_url(
                "prebuilt-read", path)
    else: 
        with open(path, "rb") as f:
            poller = document_analysis_client.begin_analyze_document(
                "prebuilt-read", document=f, locale="en-US"
        )
    result = poller.result()

    print("Document contains content: ", result.content)

    for idx, style in enumerate(result.styles):
        print(
            "Document contains {} content".format(
                "handwritten" if style.is_handwritten else "no handwritten"
            )
        )

    for page in result.pages:
        print("----Analyzing Read from page #{}----".format(page.page_number))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit
            )
        )

        for line_idx, line in enumerate(page.lines):
            print(
                "...Line # {} has text content '{}' within bounding box '{}'".format(
                    line_idx,
                    line.content,
                    format_polygon(line.polygon),
                )
            )

        for word in page.words:
            print(
                "...Word '{}' has a confidence of {}".format(
                    word.content, word.confidence
                )
            )

    print("----------------------------------------")

In [10]:
formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/rest-api/read.png"
analyze_read(formUrl)

Document contains content:  While healthcare is still in the early stages of its Al journey, we are seeing pharmaceutical and other life sciences organizations making major investments in Al and related technologies." TOM LAWRY | National Director for Al, Health and Life Sciences | Microsoft
As pharmaceutical and other life sciences organizations invest in and deploy advanced technologies, they are beginning to see benefits in diverse areas across their organizations. Companies are looking to incorporate automation and continuing smart factory investments to reduce costs in drug discovery, research and development, and manufacturing and supply chain management. Many life sciences organizations are also choosing to stay with more virtual approaches in the "new normal" - particularly in clinical trials and sales and marketing areas.
Enhancing the patient and provider experience
Clinical trial sponsors are continually seeking to make clinical trials faster and to improve the experience fo

### Document model

In [14]:
def format_bounding_region(bounding_regions):
    if not bounding_regions:
        return "N/A"
    return ", ".join("Page #{}: {}".format(region.page_number, format_polygon(region.polygon)) for region in bounding_regions)

def format_polygon(polygon):
    if not polygon:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])


def analyze_general_documents(path):
    if 'http' in path:
        poller = document_analysis_client.begin_analyze_document_from_url(
                "prebuilt-document", path)
    else: 
        with open(path, "rb") as f:
            poller = document_analysis_client.begin_analyze_document(
                "prebuilt-document", document=f, locale="en-US"
        )
    result = poller.result()

    for style in result.styles:
        if style.is_handwritten:
            print("Document contains handwritten content: ")
            print(",".join([result.content[span.offset:span.offset + span.length] for span in style.spans]))

    print("----Key-value pairs found in document----")
    for kv_pair in result.key_value_pairs:
        if kv_pair.key:
            print(
                    "Key '{}' found within '{}' bounding regions".format(
                        kv_pair.key.content,
                        format_bounding_region(kv_pair.key.bounding_regions),
                    )
                )
        if kv_pair.value:
            print(
                    "Value '{}' found within '{}' bounding regions\n".format(
                        kv_pair.value.content,
                        format_bounding_region(kv_pair.value.bounding_regions),
                    )
                )

    for page in result.pages:
        print("----Analyzing document from page #{}----".format(page.page_number))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit
            )
        )

        for line_idx, line in enumerate(page.lines):
            print(
                "...Line # {} has text content '{}' within bounding box '{}'".format(
                    line_idx,
                    line.content,
                    format_polygon(line.polygon),
                )
            )

        for word in page.words:
            print(
                "...Word '{}' has a confidence of {}".format(
                    word.content, word.confidence
                )
            )

        for selection_mark in page.selection_marks:
            print(
                "...Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
                    selection_mark.state,
                    format_polygon(selection_mark.polygon),
                    selection_mark.confidence,
                )
            )

    for table_idx, table in enumerate(result.tables):
        print(
            "Table # {} has {} rows and {} columns".format(
                table_idx, table.row_count, table.column_count
            )
        )
        for region in table.bounding_regions:
            print(
                "Table # {} location on page: {} is {}".format(
                    table_idx,
                    region.page_number,
                    format_polygon(region.polygon),
                )
            )
        for cell in table.cells:
            print(
                "...Cell[{}][{}] has content '{}'".format(
                    cell.row_index,
                    cell.column_index,
                    cell.content,
                )
            )
            for region in cell.bounding_regions:
                print(
                    "...content on page {} is within bounding box '{}'\n".format(
                        region.page_number,
                        format_polygon(region.polygon),
                    )
                )
    print("----------------------------------------")
    

In [15]:
docUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"
analyze_general_documents(docUrl)

----Key-value pairs found in document----
Key 'QUARTERLY REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934' found within 'Page #1: [0.996, 1.7804], [7.8449, 1.7804], [7.8449, 2.0559], [0.996, 2.0559]' bounding regions
Value ':selected:' found within 'Page #1: [0.6694, 1.7746], [0.7764, 1.7746], [0.7764, 1.8833], [0.6694, 1.8833]' bounding regions

Key 'For the Quarterly Period Ended' found within 'Page #1: [0.9982, 2.1626], [2.6315, 2.1626], [2.6315, 2.2665], [0.9982, 2.2665]' bounding regions
Value '2020' found within 'Page #1: [3.2152, 2.1636], [3.4543, 2.1636], [3.4543, 2.2446], [3.2152, 2.2446]' bounding regions

Key 'March 31,' found within 'Page #1: [2.678, 2.1636], [3.1736, 2.1636], [3.1736, 2.261], [2.678, 2.261]' bounding regions
Key 'TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934' found within 'Page #1: [0.9929, 2.7029], [7.8449, 2.7029], [7.8449, 2.9792], [0.9929, 2.9792]' bounding regions
Value ':unselect

In [16]:
docPath = 'images/Purchase Requisition Form-01.pdf'
analyze_general_documents(docPath)

----Key-value pairs found in document----
Key 'Requisition Number' found within 'Page #1: [0.7439, 1.606], [2.0068, 1.606], [2.0068, 1.7373], [0.7439, 1.7373]' bounding regions
Key 'Date' found within 'Page #1: [6.332, 1.62], [6.6175, 1.62], [6.6175, 1.7185], [6.332, 1.7185]' bounding regions
Key 'Vendor Name' found within 'Page #1: [0.735, 2.0097], [1.6073, 2.0097], [1.6073, 2.1151], [0.735, 2.1151]' bounding regions
Value 'Vender Details' found within 'Page #1: [0.735, 2.4127], [1.6645, 2.4127], [1.6645, 2.5185], [0.735, 2.5185]' bounding regions

Key 'Name:' found within 'Page #1: [0.7462, 2.6266], [1.1343, 2.6266], [1.1343, 2.7248], [0.7462, 2.7248]' bounding regions
Key 'Name:' found within 'Page #1: [4.4419, 2.6266], [4.83, 2.6266], [4.83, 2.7248], [4.4419, 2.7248]' bounding regions
Key 'Address:' found within 'Page #1: [0.7359, 2.8213], [1.2613, 2.8213], [1.2613, 2.9265], [0.7359, 2.9265]' bounding regions
Key 'Address:' found within 'Page #1: [4.4316, 2.8213], [4.957, 2.8213], 

In [19]:
docPath = 'images/Return to Work Form-01.pdf'
analyze_general_documents(docPath)

----Key-value pairs found in document----
Key 'Employee Name' found within 'Page #1: [0.3179, 1.271], [1.3165, 1.271], [1.3165, 1.4024], [0.3179, 1.4024]' bounding regions
Key 'Job Title' found within 'Page #1: [4.3228, 1.271], [4.833, 1.271], [4.833, 1.3765], [4.3228, 1.3765]' bounding regions
Key 'Managers Name' found within 'Page #1: [0.3179, 1.4799], [1.3218, 1.4799], [1.3218, 1.6041], [0.3179, 1.6041]' bounding regions
Key 'Date' found within 'Page #1: [4.3352, 1.48], [4.6111, 1.48], [4.6111, 1.5782], [4.3352, 1.5782]' bounding regions
Key 'Day of Absences' found within 'Page #1: [0.5012, 1.6854], [1.5, 1.6854], [1.5, 1.8174], [0.5012, 1.8174]' bounding regions
Key 'Date Returned to Work' found within 'Page #1: [4.3352, 1.686], [5.7708, 1.686], [5.7708, 1.7915], [4.3352, 1.7915]' bounding regions
Key 'Number of Working Days Absent' found within 'Page #1: [0.3179, 1.8854], [2.3344, 1.8854], [2.3344, 2.0174], [0.3179, 2.0174]' bounding regions
Key 'State briefly why you were absent 

### Layout model

In [25]:
def format_polygon(polygon):
    if not polygon:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])

def analyze_layout(path):
    if 'http' in path:
        poller = document_analysis_client.begin_analyze_document_from_url(
                "prebuilt-layout", path)
    else: 
        with open(path, "rb") as f:
            poller = document_analysis_client.begin_analyze_document(
                "prebuilt-layout", document=f, locale="en-US"
        )
    result = poller.result()

    for idx, style in enumerate(result.styles):
        print(
            "Document contains {} content".format(
                "handwritten" if style.is_handwritten else "no handwritten"
            )
        )

    for page in result.pages:
        print("----Analyzing layout from page #{}----".format(page.page_number))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit
            )
        )

        for line_idx, line in enumerate(page.lines):
            words = line.get_words()
            print(
                "...Line # {} has word count {} and text '{}' within bounding box '{}'".format(
                    line_idx,
                    len(words),
                    line.content,
                    format_polygon(line.polygon),
                )
            )

            for word in words:
                print(
                    "......Word '{}' has a confidence of {}".format(
                        word.content, word.confidence
                    )
                )

        for selection_mark in page.selection_marks:
            print(
                "...Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
                    selection_mark.state,
                    format_polygon(selection_mark.polygon),
                    selection_mark.confidence,
                )
            )

    for table_idx, table in enumerate(result.tables):
        print(
            "Table # {} has {} rows and {} columns".format(
                table_idx, table.row_count, table.column_count
            )
        )
        for region in table.bounding_regions:
            print(
                "Table # {} location on page: {} is {}".format(
                    table_idx,
                    region.page_number,
                    format_polygon(region.polygon),
                )
            )
        for cell in table.cells:
            print(
                "...Cell[{}][{}] has content '{}'".format(
                    cell.row_index,
                    cell.column_index,
                    cell.content,
                )
            )
            for region in cell.bounding_regions:
                print(
                    "...content on page {} is within bounding box '{}'".format(
                        region.page_number,
                        format_polygon(region.polygon),
                    )
                )

    print("----------------------------------------")

In [26]:
formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"
analyze_layout(formUrl)

----Analyzing layout from page #1----
Page has width: 8.5 and height: 11.0, measured with unit: inch
...Line # 0 has word count 2 and text 'UNITED STATES' within bounding box '[3.4915, 0.6828], [5.0116, 0.6828], [5.0116, 0.8265], [3.4915, 0.8265]'
......Word 'UNITED' has a confidence of 1.0
......Word 'STATES' has a confidence of 1.0
...Line # 1 has word count 4 and text 'SECURITIES AND EXCHANGE COMMISSION' within bounding box '[2.1937, 0.9061], [6.297, 0.9061], [6.297, 1.0498], [2.1937, 1.0498]'
......Word 'SECURITIES' has a confidence of 1.0
......Word 'AND' has a confidence of 1.0
......Word 'EXCHANGE' has a confidence of 1.0
......Word 'COMMISSION' has a confidence of 1.0
...Line # 2 has word count 3 and text 'Washington, D.C. 20549' within bounding box '[3.4629, 1.1179], [5.031, 1.1179], [5.031, 1.2483], [3.4629, 1.2483]'
......Word 'Washington,' has a confidence of 1.0
......Word 'D.C.' has a confidence of 1.0
......Word '20549' has a confidence of 1.0
...Line # 3 has word count 

In [18]:
docPath = 'images/Purchase Requisition Form-01.pdf'
analyze_layout(docPath)

----Analyzing layout from page #1----
Page has width: 8.5 and height: 11.0, measured with unit: inch
...Line # 0 has word count 2 and text 'Ship Via' within bounding box '[1.1784, 3.6231], [1.6763, 3.6231], [1.6763, 3.7543], [1.1784, 3.7543]'
......Word 'Ship' has a confidence of 1.0
......Word 'Via' has a confidence of 1.0
...Line # 1 has word count 2 and text 'Shipping Method' within bounding box '[2.3784, 3.6231], [3.4561, 3.6231], [3.4561, 3.7545], [2.3784, 3.7545]'
......Word 'Shipping' has a confidence of 1.0
......Word 'Method' has a confidence of 1.0
...Line # 2 has word count 2 and text 'Shipping Terms' within bounding box '[4.5124, 3.6231], [5.4798, 3.6231], [5.4798, 3.7545], [4.5124, 3.7545]'
......Word 'Shipping' has a confidence of 1.0
......Word 'Terms' has a confidence of 1.0
...Line # 3 has word count 2 and text 'Delivery Date' within bounding box '[6.5953, 3.6231], [7.4437, 3.6231], [7.4437, 3.7543], [6.5953, 3.7543]'
......Word 'Delivery' has a confidence of 1.0
.....

### Prebuilt model

In [6]:
def format_bounding_region(bounding_regions):
    if not bounding_regions:
        return "N/A"
    return ", ".join("Page #{}: {}".format(region.page_number, format_polygon(region.polygon)) for region in bounding_regions)

def format_polygon(polygon):
    if not polygon:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])

def analyze_invoice(path):
    if 'http' in path:
        poller = document_analysis_client.begin_analyze_document_from_url(
                "prebuilt-invoice", path)
    else: 
        with open(path, "rb") as f:
            poller = document_analysis_client.begin_analyze_document(
                "prebuilt-invoice", document=f, locale="en-US"
        )
    invoices = poller.result()

    for idx, invoice in enumerate(invoices.documents):
        print("--------Recognizing invoice #{}--------".format(idx + 1))
        vendor_name = invoice.fields.get("VendorName")
        if vendor_name:
            print(
                "Vendor Name: {} has confidence: {}".format(
                    vendor_name.value, vendor_name.confidence
                )
            )
        vendor_address = invoice.fields.get("VendorAddress")
        if vendor_address:
            print(
                "Vendor Address: {} has confidence: {}".format(
                    vendor_address.value, vendor_address.confidence
                )
            )
        vendor_address_recipient = invoice.fields.get("VendorAddressRecipient")
        if vendor_address_recipient:
            print(
                "Vendor Address Recipient: {} has confidence: {}".format(
                    vendor_address_recipient.value, vendor_address_recipient.confidence
                )
            )
        customer_name = invoice.fields.get("CustomerName")
        if customer_name:
            print(
                "Customer Name: {} has confidence: {}".format(
                    customer_name.value, customer_name.confidence
                )
            )
        customer_id = invoice.fields.get("CustomerId")
        if customer_id:
            print(
                "Customer Id: {} has confidence: {}".format(
                    customer_id.value, customer_id.confidence
                )
            )
        customer_address = invoice.fields.get("CustomerAddress")
        if customer_address:
            print(
                "Customer Address: {} has confidence: {}".format(
                    customer_address.value, customer_address.confidence
                )
            )
        customer_address_recipient = invoice.fields.get("CustomerAddressRecipient")
        if customer_address_recipient:
            print(
                "Customer Address Recipient: {} has confidence: {}".format(
                    customer_address_recipient.value,
                    customer_address_recipient.confidence,
                )
            )
        invoice_id = invoice.fields.get("InvoiceId")
        if invoice_id:
            print(
                "Invoice Id: {} has confidence: {}".format(
                    invoice_id.value, invoice_id.confidence
                )
            )
        invoice_date = invoice.fields.get("InvoiceDate")
        if invoice_date:
            print(
                "Invoice Date: {} has confidence: {}".format(
                    invoice_date.value, invoice_date.confidence
                )
            )
        invoice_total = invoice.fields.get("InvoiceTotal")
        if invoice_total:
            print(
                "Invoice Total: {} has confidence: {}".format(
                    invoice_total.value, invoice_total.confidence
                )
            )
        due_date = invoice.fields.get("DueDate")
        if due_date:
            print(
                "Due Date: {} has confidence: {}".format(
                    due_date.value, due_date.confidence
                )
            )
        purchase_order = invoice.fields.get("PurchaseOrder")
        if purchase_order:
            print(
                "Purchase Order: {} has confidence: {}".format(
                    purchase_order.value, purchase_order.confidence
                )
            )
        billing_address = invoice.fields.get("BillingAddress")
        if billing_address:
            print(
                "Billing Address: {} has confidence: {}".format(
                    billing_address.value, billing_address.confidence
                )
            )
        billing_address_recipient = invoice.fields.get("BillingAddressRecipient")
        if billing_address_recipient:
            print(
                "Billing Address Recipient: {} has confidence: {}".format(
                    billing_address_recipient.value,
                    billing_address_recipient.confidence,
                )
            )
        shipping_address = invoice.fields.get("ShippingAddress")
        if shipping_address:
            print(
                "Shipping Address: {} has confidence: {}".format(
                    shipping_address.value, shipping_address.confidence
                )
            )
        shipping_address_recipient = invoice.fields.get("ShippingAddressRecipient")
        if shipping_address_recipient:
            print(
                "Shipping Address Recipient: {} has confidence: {}".format(
                    shipping_address_recipient.value,
                    shipping_address_recipient.confidence,
                )
            )
        print("Invoice items:")
        for idx, item in enumerate(invoice.fields.get("Items").value):
            print("...Item #{}".format(idx + 1))
            item_description = item.value.get("Description")
            if item_description:
                print(
                    "......Description: {} has confidence: {}".format(
                        item_description.value, item_description.confidence
                    )
                )
            item_quantity = item.value.get("Quantity")
            if item_quantity:
                print(
                    "......Quantity: {} has confidence: {}".format(
                        item_quantity.value, item_quantity.confidence
                    )
                )
            unit = item.value.get("Unit")
            if unit:
                print(
                    "......Unit: {} has confidence: {}".format(
                        unit.value, unit.confidence
                    )
                )
            unit_price = item.value.get("UnitPrice")
            if unit_price:
                print(
                    "......Unit Price: {} has confidence: {}".format(
                        unit_price.value, unit_price.confidence
                    )
                )
            product_code = item.value.get("ProductCode")
            if product_code:
                print(
                    "......Product Code: {} has confidence: {}".format(
                        product_code.value, product_code.confidence
                    )
                )
            item_date = item.value.get("Date")
            if item_date:
                print(
                    "......Date: {} has confidence: {}".format(
                        item_date.value, item_date.confidence
                    )
                )
            tax = item.value.get("Tax")
            if tax:
                print(
                    "......Tax: {} has confidence: {}".format(tax.value, tax.confidence)
                )
            amount = item.value.get("Amount")
            if amount:
                print(
                    "......Amount: {} has confidence: {}".format(
                        amount.value, amount.confidence
                    )
                )
        subtotal = invoice.fields.get("SubTotal")
        if subtotal:
            print(
                "Subtotal: {} has confidence: {}".format(
                    subtotal.value, subtotal.confidence
                )
            )
        total_tax = invoice.fields.get("TotalTax")
        if total_tax:
            print(
                "Total Tax: {} has confidence: {}".format(
                    total_tax.value, total_tax.confidence
                )
            )
        previous_unpaid_balance = invoice.fields.get("PreviousUnpaidBalance")
        if previous_unpaid_balance:
            print(
                "Previous Unpaid Balance: {} has confidence: {}".format(
                    previous_unpaid_balance.value, previous_unpaid_balance.confidence
                )
            )
        amount_due = invoice.fields.get("AmountDue")
        if amount_due:
            print(
                "Amount Due: {} has confidence: {}".format(
                    amount_due.value, amount_due.confidence
                )
            )
        service_start_date = invoice.fields.get("ServiceStartDate")
        if service_start_date:
            print(
                "Service Start Date: {} has confidence: {}".format(
                    service_start_date.value, service_start_date.confidence
                )
            )
        service_end_date = invoice.fields.get("ServiceEndDate")
        if service_end_date:
            print(
                "Service End Date: {} has confidence: {}".format(
                    service_end_date.value, service_end_date.confidence
                )
            )
        service_address = invoice.fields.get("ServiceAddress")
        if service_address:
            print(
                "Service Address: {} has confidence: {}".format(
                    service_address.value, service_address.confidence
                )
            )
        service_address_recipient = invoice.fields.get("ServiceAddressRecipient")
        if service_address_recipient:
            print(
                "Service Address Recipient: {} has confidence: {}".format(
                    service_address_recipient.value,
                    service_address_recipient.confidence,
                )
            )
        remittance_address = invoice.fields.get("RemittanceAddress")
        if remittance_address:
            print(
                "Remittance Address: {} has confidence: {}".format(
                    remittance_address.value, remittance_address.confidence
                )
            )
        remittance_address_recipient = invoice.fields.get("RemittanceAddressRecipient")
        if remittance_address_recipient:
            print(
                "Remittance Address Recipient: {} has confidence: {}".format(
                    remittance_address_recipient.value,
                    remittance_address_recipient.confidence,
                )
            )

    print("----------------------------------------")

In [7]:
invoiceUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-invoice.pdf"
analyze_invoice(invoiceUrl)

--------Recognizing invoice #1--------
Vendor Name: CONTOSO LTD. has confidence: 0.926
Vendor Address: AddressValue(house_number=123, po_box=None, road=456th St, city=New York, state=NY, postal_code=10001, country_region=None, street_address=123 456th St) has confidence: 0.907
Vendor Address Recipient: Contoso Headquarters has confidence: 0.926
Customer Name: MICROSOFT CORPORATION has confidence: 0.916
Customer Id: CID-12345 has confidence: 0.966
Customer Address: AddressValue(house_number=123, po_box=None, road=Other St, city=Redmond, state=WA, postal_code=98052, country_region=None, street_address=123 Other St) has confidence: 0.908
Customer Address Recipient: Microsoft Corp has confidence: 0.926
Invoice Id: INV-100 has confidence: 0.978
Invoice Date: 2019-11-15 has confidence: 0.978
Invoice Total: $110.0 has confidence: 0.968
Due Date: 2019-12-15 has confidence: 0.976
Purchase Order: PO-3333 has confidence: 0.97
Billing Address: AddressValue(house_number=123, po_box=None, road=Bill 

In [8]:
analyze_invoice('images/invoice.jpg')

--------Recognizing invoice #1--------
Vendor Name: CONTOSO LTD. has confidence: 0.926
Vendor Address: AddressValue(house_number=123, po_box=None, road=456th St, city=New York, state=NY, postal_code=10001, country_region=None, street_address=123 456th St) has confidence: 0.907
Vendor Address Recipient: Contoso Headquarters has confidence: 0.926
Customer Name: MICROSOFT CORPORATION has confidence: 0.908
Customer Id: CID-12345 has confidence: 0.961
Customer Address: AddressValue(house_number=123, po_box=None, road=Other St, city=Redmond, state=WA, postal_code=98052, country_region=None, street_address=123 Other St) has confidence: 0.908
Customer Address Recipient: Microsoft Corp has confidence: 0.926
Invoice Id: INV-100 has confidence: 0.978
Invoice Date: 2019-11-15 has confidence: 0.978
Invoice Total: $110.0 has confidence: 0.968
Due Date: 2019-12-15 has confidence: 0.976
Purchase Order: PO-3333 has confidence: 0.97
Billing Address: AddressValue(house_number=123, po_box=None, road=Bill 

In [20]:
docPath = 'images/Purchase Requisition Form-01.pdf'
analyze_invoice(docPath)

--------Recognizing invoice #1--------
Invoice Total: 44997.13 has confidence: 0.978
Invoice items:
...Item #1
......Quantity: 6.0 has confidence: 0.975
......Unit Price: 100.0 has confidence: 0.976
......Amount: 600.0 has confidence: 0.977
...Item #2
......Quantity: 7.0 has confidence: 0.975
......Unit Price: 239.0 has confidence: 0.977
......Amount: 1673.0 has confidence: 0.978
...Item #3
......Quantity: 6.0 has confidence: 0.975
......Unit Price: 240.0 has confidence: 0.977
......Amount: 1440.0 has confidence: 0.978
...Item #4
......Quantity: 8.0 has confidence: 0.975
......Unit Price: 300.0 has confidence: 0.976
......Amount: 2400.0 has confidence: 0.978
...Item #5
......Quantity: 9.0 has confidence: 0.976
......Unit Price: 200.0 has confidence: 0.976
......Amount: 1800.0 has confidence: 0.978
...Item #6
......Quantity: 10.0 has confidence: 0.976
......Unit Price: 100.0 has confidence: 0.976
......Amount: 1000.0 has confidence: 0.977
...Item #7
......Quantity: 12.0 has confidence: 