In [25]:
# Install required dependencies

!pip install azure-ml
!pip install azure-ai-formrecognizer



In [70]:
### Create a copy and instantiate all values

import xlwings as xw
import openpyxl

original_file = "../test-invoices/General ledger.xlsx"
new_file = "../results/testing.xlsx"

original_wb = xw.Book(original_file)
new_wb = xw.Book(new_file)

wb = openpyxl.load_workbook('../results/testing.xlsx') 
count = len(wb.sheetnames)

# Only copy and instantiate when creating from scratch
# Prevents data loss
if wb.active.title != "Updated":
    original_ws = original_wb.sheets(1)
    original_ws.api.Copy(After=new_wb.sheets(1).api)
    new_wb.sheets(2).name = "Updated"

    new_wb.sheets(2).range("A10:M18").delete()
    new_wb.save()
    

In [71]:
def store_values(row, number, date, recipient, desc, amount):

    file_path = "../results/testing.xlsx"
    wb = xw.Book(file_path)

    ws = wb.sheets["Updated"]

    if date is not None:
        ws[f"A{row}"].value = date.value

    ws[f"B{row}"].value = "I"
    
    if number is not None:
        ws[f"C{row}"].value = number.value

    if desc is not None:
        ws[f"E{row}"].value = desc.value

    if recipient is not None:
        ws[f"F{row}"].value = recipient.value

    if amount is not None:
        ws[f"K{row}"].value = amount.value.amount

    wb.save()


In [72]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient

# Store connection information
endpoint = "https://docintfeng.cognitiveservices.azure.com/"
key = "Dcg3SI88GLqCcqxspAEJrrZ9A9YhZntJ7oV33e3BwG1oJ4XRvFIgJQQJ99ALACYeBjFXJ3w3AAALACOGy3Vg"

fileUri = "https://github.com/YggdrasilSacred/DocToExcel/blob/main/Labfiles/01-prebuild-models/test-invoices/sampleinvoice.pdf?raw=true"
fileLocale = "en-US"
fileModelId = "prebuilt-invoice"

print(f"\nConnecting to Forms Recognizer at: {endpoint}")
print(f"Analyzing invoice at: {fileUri}")

# Create the client

document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

# Analyse the invoice

poller = document_analysis_client.begin_analyze_document_from_url(
    fileModelId, fileUri, locale=fileLocale 
)
invoices = poller.result()

# Display invoice information to the user

rowcount = 10

for idx, invoice in enumerate(invoices.documents):
    print("\n--------Recognizing invoice #{}--------\n".format(idx + 1))

    file_path = "../results/testing.xlsx"
    wb = openpyxl.load_workbook(file_path)
    ws = wb["Updated"]
    
    ### ----Invoice header details---- ###

    # Invoice number
    invoice_number = invoice.fields.get("InvoiceId")
    if invoice_number:
        print(
            "Invoice Number: {} has confidence: {}".format(
                invoice_number.value, invoice_number.confidence
            )
        )

    # Invoice date
    invoice_date = invoice.fields.get("InvoiceDate")
    if invoice_date:
        print(
            "Invoice Date: {} has confidence: {}".format(
                invoice_date.value, invoice_date.confidence
            )
        )

    ### ----Billing Information---- ###

    # Bill Recipient
    billing_address_recipient = invoice.fields.get("BillingAddressRecipient")
    if billing_address_recipient:
        print(
            "Billing Address Recipient: {} has confidence: {}".format(
                billing_address_recipient.value,
                billing_address_recipient.confidence,
            )
        )

    ### ----Service/Item Details---- ###

    print("Invoice items:")

    for idx, item in enumerate(invoice.fields.get("Items").value):
        print("...Item #{}".format(idx + 1))

        # Description
        item_description = item.value.get("Description")    
        if item_description:
            print(
                "......Description: {} has confidence: {}".format(
                    item_description.value, item_description.confidence
                )
            )

        # Item total amount
        amount = item.value.get("Amount")
        if amount:
            print(
                "......Amount: {} has confidence: {}".format(
                    amount.value, amount.confidence
                )
            )

    print("----------------------------------------")
    print("\nNow storing values...")

    store_values(rowcount, invoice_number, invoice_date, billing_address_recipient, item_description, amount)

    rowcount += 1

print("\nAnalysis complete.\n")


Connecting to Forms Recognizer at: https://docintfeng.cognitiveservices.azure.com/
Analyzing invoice at: https://github.com/YggdrasilSacred/DocToExcel/blob/main/Labfiles/01-prebuild-models/test-invoices/sampleinvoice.pdf?raw=true

--------Recognizing invoice #1--------

Invoice Number: 12345 has confidence: 0.968
Invoice Date: 2022-06-25 has confidence: 0.93
Invoice items:
...Item #1
......Description: Consultation has confidence: 0.923
......Amount: $300.0 has confidence: 0.923
...Item #2
......Description: Project Draft has confidence: 0.923
......Amount: $2400.0 has confidence: 0.924
...Item #3
......Description: Implementation has confidence: 0.93
......Amount: $2500.0 has confidence: 0.923
...Item #4
......Description: Additional Supplies has confidence: 0.923
......Amount: $750.0 has confidence: 0.922
...Item #5
......Description: Monthly meeting has confidence: 0.923
......Amount: $2000.0 has confidence: 0.922
----------------------------------------

Now storing values...

Ana