In [17]:
import os
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.formrecognizer import FormRecognizerClient
from azure.ai.formrecognizer import FormTrainingClient
from azure.core.credentials import AzureKeyCredential
from azure.keyvault.secrets import SecretClient
from azure.identity import DefaultAzureCredential

## Azure authentication
Don't forget to `az login` in cmd!

In [18]:
keyVaultName = "cvprojectkeyvault"
KVUri = f"https://{keyVaultName}.vault.azure.net"

credential = DefaultAzureCredential()
client = SecretClient(vault_url=KVUri, credential=credential)

In [19]:
AZURE_FORM_RECOGNIZER_ENDPOINT = "https://cvprojectformrecognizer.cognitiveservices.azure.com/"
secretName = "formrecognizerkey1"
retrieved_secret = client.get_secret(secretName)

endpoint = AZURE_FORM_RECOGNIZER_ENDPOINT
key = retrieved_secret.value
form_training_client = FormTrainingClient(endpoint=endpoint, credential=AzureKeyCredential(key))

In [20]:
saved_model_list = form_training_client.list_custom_models()
print(saved_model_list)

<iterator object azure.core.paging.ItemPaged at 0x1e8f3f3a350>


## Training data URL

In [21]:
trainingDataUrl = client.get_secret("blobstoragetrainingdataurl").value
#print(trainingDataUrl)

## Labeled training

In [22]:
labeled_training_process = form_training_client.begin_training(trainingDataUrl, use_training_labels=True)
labeled_custom_model = labeled_training_process.result()

In [23]:
labeled_custom_model.model_id

'08bc6a71-bf06-45c6-9a0d-f662f9ad44d5'

In [24]:
labeled_custom_model.status

'ready'

In [25]:
labeled_custom_model.training_documents

[TrainingDocumentInfo(name=boarding_pass_1.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass_10.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass_2.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass_3.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass_4.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass_5.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding_pass_6.pdf, status=succeeded, page_count=1, errors=[], model_id=None)]

In [26]:
for doc in labeled_custom_model.training_documents:
    print("Document name: {}".format(doc.name))
    print("Document status: {}".format(doc.status))
    print("Document page count: {}".format(doc.page_count))
    print("Document errors: {}".format(doc.errors))

Document name: boarding_pass_1.pdf
Document status: succeeded
Document page count: 1
Document errors: []
Document name: boarding_pass_10.pdf
Document status: succeeded
Document page count: 1
Document errors: []
Document name: boarding_pass_2.pdf
Document status: succeeded
Document page count: 1
Document errors: []
Document name: boarding_pass_3.pdf
Document status: succeeded
Document page count: 1
Document errors: []
Document name: boarding_pass_4.pdf
Document status: succeeded
Document page count: 1
Document errors: []
Document name: boarding_pass_5.pdf
Document status: succeeded
Document page count: 1
Document errors: []
Document name: boarding_pass_6.pdf
Document status: succeeded
Document page count: 1
Document errors: []


## Model Testing

In [38]:
test_file = open("../material_preparation_step/BoardingPasses/boarding_pass_johannes.pdf", "rb")

In [39]:
labeled_custom_test_action = form_recognizer_client.begin_recognize_custom_forms(model_id=labeled_custom_model.model_id, form=test_file)

In [41]:
labeled_custom_test_action.status()

'succeeded'

In [42]:
labeled_custom_test_action_result = labeled_custom_test_action.result()

In [43]:
for recognized_content in labeled_custom_test_action_result:
    print("Form type: {}".format(recognized_content.form_type))
    for name, field in recognized_content.fields.items():
        print("Field '{}' has label '{}' with value '{}' and a confidence score of {}".format(
            name,
            field.label_data.text if field.label_data else name,
            field.value,
            field.confidence
        ))

Form type: custom:08bc6a71-bf06-45c6-9a0d-f662f9ad44d5
Field 'From 2' has label 'From 2' with value 'San Francisco' and a confidence score of 0.993
Field 'Seat 2' has label 'Seat 2' with value '16F' and a confidence score of 0.994
Field 'Gate 2' has label 'Gate 2' with value 'G1' and a confidence score of 0.995
Field 'Date 2' has label 'Date 2' with value 'April 20, 2022' and a confidence score of 0.99
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.993
Field 'To 2' has label 'To 2' with value 'Chicago' and a confidence score of 0.994
Field 'Passenger Name 2' has label 'Passenger Name 2' with value 'Johannes Czylwik' and a confidence score of 0.994
Field 'Baggage' has label 'Baggage' with value 'NO' and a confidence score of 0.995
Field 'Passenger Name' has label 'Passenger Name' with value 'Johannes Czylwik' and a confidence score of 0.994
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.995
Field 'Date' has label 'Date' wi