### Install the required Python modules/SDKs

In [1]:
! activate ai-azure-c1

import sys

sys.path.append("/opt/conda/envs/ai-azure-c1/lib/python3.8/site-packages")

# Step 2

In [2]:
import os
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.formrecognizer import FormRecognizerClient
from azure.ai.formrecognizer import FormTrainingClient
from azure.core.credentials import AzureKeyCredential

In [47]:
AZURE_FORM_RECOGNIZER_ENDPOINT = 'https://longthformreg.cognitiveservices.azure.com/'
AZURE_FORM_RECOGNIZER_KEY = 'cfe0495a37bf47fbafb106110c64830d'

## 2.1. Extract digital id infor with prebuilt ID model

In [48]:
form_recognizer_client = FormRecognizerClient(AZURE_FORM_RECOGNIZER_ENDPOINT, 
                                              AzureKeyCredential(AZURE_FORM_RECOGNIZER_KEY))
avkash_digital_dl_url = 'https://longthstorage.blob.core.windows.net/digital-id/ca-dl-avkash-chauhan.png?sp=r&st=2022-06-09T02:50:22Z&se=2022-06-16T10:50:22Z&spr=https&sv=2020-08-04&sr=b&sig=dizSg8tep5UbErgrTmzzy7fhfwnMlKeB17rIbqgJ12A%3D'

In [49]:
def get_dl_card_details(dl_card):
    attrib_list = list(dl_card.fields.keys())
    print("Detected information: ")
    for attrib in attrib_list:
        attrib_info = dl_card.fields[attrib]
        print("{}: {} has confidence: {}".format(attrib, attrib_info.value, attrib_info.confidence))

In [50]:
digital_id_content_url = form_recognizer_client.begin_recognize_identity_documents_from_url(avkash_digital_dl_url)
digital_id_content = digital_id_content_url.result()

In [51]:
get_dl_card_details(digital_id_content[0])

Detected information: 
Address: 1234 Circle Ave, Apt 123 San Mateo, CA, 94401 has confidence: 0.585
CountryRegion: USA has confidence: 0.99
DateOfBirth: 1990-01-01 has confidence: 0.995
DateOfExpiration: 2025-01-01 has confidence: 0.992
DocumentNumber: D1234578 has confidence: 0.995
FirstName: AVKASH CHAUHAN has confidence: 0.76
LastName: CHAUHAN has confidence: 0.883
Region: California has confidence: 0.984
Sex: X has confidence: 0.161


## 2.2. Build a Custom Boarding Pass Recognizer Model

In [73]:
form_training_client = FormTrainingClient(AZURE_FORM_RECOGNIZER_ENDPOINT, 
                                          AzureKeyCredential(AZURE_FORM_RECOGNIZER_KEY))
training_bp_data_url = 'https://longthstorage.blob.core.windows.net/boarding-pass?sp=racwdl&st=2022-06-09T02:07:50Z&se=2022-06-16T10:07:50Z&spr=https&sv=2020-08-04&sr=c&sig=bgS1kLbfnR8brchR3oyja%2FaYqGafDj0crf15cADKr4w%3D'

### Training (except Libby Herold's boarding pass)

In [74]:
training_process = form_training_client.begin_training(training_bp_data_url, use_training_labels=True)
custom_form_model = training_process.result()

In [75]:
print("Training custom form model {} is done.\nTraining start from {} to {}".format(custom_form_model.model_id, custom_form_model.training_started_on, custom_form_model.training_completed_on))
print("List of documents use for training: ")
for doc in custom_form_model.training_documents:
    print("Document name: " + str(doc.name))
    print("\tStatus: " + doc.status + " --- " + "Error: " + "None" if len(doc.errors) == 0 else str(doc.errors))

Training custom form model 95a82392-0e18-4a2d-87b7-86d0409eb689 is done.
Training start from 2022-06-09 03:35:08+00:00 to 2022-06-09 03:35:10+00:00
List of documents use for training: 
Document name: boarding-avkash.pdf
	Status: succeeded --- Error: None
Document name: boarding-eo.pdf
	Status: succeeded --- Error: None
Document name: boarding-james-webb.pdf
	Status: succeeded --- Error: None
Document name: boarding-james.pdf
	Status: succeeded --- Error: None
Document name: boarding-lamth.pdf
	Status: succeeded --- Error: None
Document name: boarding-longth.pdf
	Status: succeeded --- Error: None
Document name: boarding-luffy.pdf
	Status: succeeded --- Error: None
Document name: boarding-oden.pdf
	Status: succeeded --- Error: None
Document name: boarding-parzival.pdf
	Status: succeeded --- Error: None
Document name: boarding-radha-s-kumar.pdf
	Status: succeeded --- Error: None
Document name: boarding-sameer.pdf
	Status: succeeded --- Error: None
Document name: boarding_saitama.pdf
	Stat

In [76]:
print("List of field in boarding pass to recognize and accuracy:")
for submodel in custom_form_model.submodels:
    for name, field in submodel.fields.items():
        print("'{}' with accuracy: {}".format(name, field.accuracy))

List of field in boarding pass to recognize and accuracy:
'Baggage' with accuracy: 0.995
'Boarding Time' with accuracy: 0.995
'Carrier' with accuracy: 0.995
'Class' with accuracy: 0.995
'Date' with accuracy: 0.995
'Flight No' with accuracy: 0.995
'From' with accuracy: 0.995
'Gate' with accuracy: 0.917
'Passenger Name' with accuracy: 0.995
'Seat' with accuracy: 0.995
'Ticket No' with accuracy: 0.995
'To' with accuracy: 0.917


### Test on Libby Herold boarding pass

In [77]:
test_boarding_pass_url = "https://longthstorage.blob.core.windows.net/boarding-pass-test/boarding-libby.pdf?sp=r&st=2022-06-09T03:34:42Z&se=2022-06-16T11:34:42Z&spr=https&sv=2020-08-04&sr=b&sig=J%2BqFyRNk3QauNZYD8qJP%2BqryX%2F1wOX%2BCb4UwE5gG8%2F4%3D"
test_action = form_recognizer_client.begin_recognize_custom_forms_from_url(model_id=custom_form_model.model_id, form_url=test_boarding_pass_url)
test_result = test_action.result()[0]

In [78]:
print("Detected fields: ")
for name, field in test_result.fields.items():
    print("Field '{}' has label '{}' with value '{}' and a confidence score of {}".format(
            name,
            field.label_data.text if field.label_data else name,
            field.value,
            field.confidence
        ))

Detected fields: 
Field 'Ticket No' has label 'Ticket No' with value 'ETK-34236749B' and a confidence score of 0.991
Field 'Boarding Time' has label 'Boarding Time' with value '10:00 AM PST' and a confidence score of 0.99
Field 'To' has label 'To' with value 'Chicago' and a confidence score of 0.97
Field 'Class' has label 'Class' with value 'B' and a confidence score of 0.99
Field 'Flight No' has label 'Flight No' with value '234' and a confidence score of 0.994
Field 'Carrier' has label 'Carrier' with value 'UA' and a confidence score of 0.994
Field 'From' has label 'From' with value 'San Francisco' and a confidence score of 0.99
Field 'Gate' has label 'Gate' with value 'G1' and a confidence score of 0.971
Field 'Passenger Name' has label 'Passenger Name' with value 'Libby Herold' and a confidence score of 0.992
Field 'Baggage' has label 'Baggage' with value 'YES' and a confidence score of 0.995
Field 'Seat' has label 'Seat' with value '3D' and a confidence score of 0.99
Field 'Date' 