# Importing Azure Form Recognizer Python modules


In [1]:
import os
from azure.ai.formrecognizer import FormRecognizerClient
from azure.ai.formrecognizer import FormTrainingClient
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv

# Form Recognizer endpoint and key and instantiate object


In [2]:
# Load .env file
load_dotenv()

# Get the environment variable
AZURE_FORM_RECOGNIZER_ENDPOINT = os.getenv("AZURE_FORM_RECOGNIZER_ENDPOINT")
AZURE_FORM_RECOGNIZER_KEY = os.getenv("AZURE_FORM_RECOGNIZER_KEY")

endpoint = AZURE_FORM_RECOGNIZER_ENDPOINT
key = AZURE_FORM_RECOGNIZER_KEY


form_recognizer_client = FormRecognizerClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

# ID Card detection


In [3]:
id_card_file_path = "ca-dl-avkash-chauhan.png"

with open(id_card_file_path, "rb") as c:
    poller = form_recognizer_client.begin_recognize_identity_documents(
        identity_document=c
    )
id_documents = poller.result()

In [4]:
for idx, id_document in enumerate(id_documents):
    FIELD_KEYS = [
        "FirstName",
        "LastName",
        "DocumentNumber",
        "DateOfBirth",
        "DateOfExpiration",
        "Sex",
        "Address",
        "CountryRegion",
        "Region",
    ]

for idx, id_document in enumerate(id_documents):
    print(f"--------Recognizing ID Card document #{idx + 1}--------")

    for field_key in FIELD_KEYS:
        field = id_document.fields.get(field_key)

        if field:
            # Special handling for the 'Sex' field
            if field_key == "Sex":
                value = field.value[1:]
            else:
                value = field.value

            print(f"{field_key}: {value} has confidence: {field.confidence}")

--------Recognizing ID Card document #1--------
FirstName: AVKASH CHAUHAN has confidence: 0.76
LastName: CHAUHAN has confidence: 0.883
DocumentNumber: D1234578 has confidence: 0.995
DateOfBirth: 1990-01-01 has confidence: 0.995
DateOfExpiration: 2025-01-01 has confidence: 0.992
Sex:  has confidence: 0.161
Address: 1234 Circle Ave, Apt 123 San Mateo, CA, 94401 has confidence: 0.585
CountryRegion: USA has confidence: 0.99
Region: California has confidence: 0.984


# Train Custom Boarding Pass Recognition


In [5]:
form_training_client = FormTrainingClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

saved_model_list = form_training_client.list_custom_models()

In [6]:
trainingDataUrl = os.getenv("AZURE_BLOB_CONTAINER_SAS_URI")

In [7]:
training_process = form_training_client.begin_training(
    trainingDataUrl, use_training_labels=False
)
custom_model = training_process.result()

In [8]:
custom_model

CustomFormModel(model_id=ff651388-95a7-4be5-868b-8627c17f7aff, status=ready, training_started_on=2024-04-19 12:23:56+00:00, training_completed_on=2024-04-19 12:24:10+00:00, submodels=[CustomFormSubmodel(accuracy=None, model_id=ff651388-95a7-4be5-868b-8627c17f7aff, fields={'field-0': CustomFormModelField(label=Baggage, name=field-0, accuracy=None), 'field-1': CustomFormModelField(label=Boarding Time, name=field-1, accuracy=None), 'field-2': CustomFormModelField(label=Carrier, name=field-2, accuracy=None), 'field-3': CustomFormModelField(label=Chicago, name=field-3, accuracy=None), 'field-4': CustomFormModelField(label=Class, name=field-4, accuracy=None), 'field-5': CustomFormModelField(label=Date, name=field-5, accuracy=None), 'field-6': CustomFormModelField(label=Flight No., name=field-6, accuracy=None), 'field-7': CustomFormModelField(label=From, name=field-7, accuracy=None), 'field-8': CustomFormModelField(label=From:, name=field-8, accuracy=None), 'field-9': CustomFormModelField(lab

In [9]:
custom_model.model_id

'ff651388-95a7-4be5-868b-8627c17f7aff'

In [10]:
custom_model.status

'ready'

In [11]:
custom_model.training_started_on

datetime.datetime(2024, 4, 19, 12, 23, 56, tzinfo=<isodate.tzinfo.Utc object at 0x00000208D860FDF0>)

In [12]:
custom_model.training_completed_on

datetime.datetime(2024, 4, 19, 12, 24, 10, tzinfo=<isodate.tzinfo.Utc object at 0x00000208D860FDF0>)

In [18]:
custom_model.training_documents

[TrainingDocumentInfo(name=boarding-james-webb.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding-james.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding-libby.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding-radha-s-kumar.pdf, status=succeeded, page_count=1, errors=[], model_id=None),
 TrainingDocumentInfo(name=boarding-sameer.pdf, status=succeeded, page_count=1, errors=[], model_id=None)]

In [14]:
custom_model.properties

CustomFormModelProperties(is_composed_model=False)

# Extract Information Using the custom model


In [15]:
file_path = "boarding-avkash.pdf"

In [16]:
with open(file_path, "rb") as f:
    poller = form_recognizer_client.begin_recognize_custom_forms(
        model_id=custom_model.model_id,
        form=f,
        include_field_elements=True,
    )
forms = poller.result()

In [17]:
for idx, form in enumerate(forms):
    print("--------Recognizing Form #{}--------".format(idx + 1))
    print("Form was analyzed with model with ID {}".format(form.model_id))
    for name, field in form.fields.items():
        print(
            "Field '{}' has label '{}' with value '{}' and a confidence score of {}".format(
                name,
                field.label_data.text if field.label_data else name,
                field.value,
                field.confidence,
            )
        )

--------Recognizing Form #1--------
Form was analyzed with model with ID ff651388-95a7-4be5-868b-8627c17f7aff
Field 'field-0' has label 'Passenger Name' with value 'Avkash Chauhan' and a confidence score of 1.0
Field 'field-1' has label 'Carrier' with value 'UA' and a confidence score of 1.0
Field 'field-2' has label 'Flight No.' with value '234' and a confidence score of 1.0
Field 'field-3' has label 'Class' with value 'E' and a confidence score of 1.0
Field 'field-4' has label 'Passenger Name' with value 'Avkash Chauhan' and a confidence score of 0.36
Field 'field-5' has label 'From:' with value 'San Francisco' and a confidence score of 1.0
Field 'field-6' has label 'Date' with value 'April 20, 2022' and a confidence score of 1.0
Field 'field-7' has label 'Baggage' with value 'NO' and a confidence score of 0.36
Field 'field-8' has label 'Seat' with value '20A' and a confidence score of 0.86
Field 'field-9' has label 'Seat' with value '20A' and a confidence score of 0.36
Field 'field-