### Please install the required Python modules/SDKs

In [1]:
! activate ai-azure-c1

import sys

sys.path.append("/opt/conda/envs/ai-azure-c1/lib/python3.8/site-packages")

## Step 2: Text Data Extraction Using Azure Form Recognizer

In [2]:
import os
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.formrecognizer import FormRecognizerClient
from azure.ai.formrecognizer import FormTrainingClient
from azure.core.credentials import AzureKeyCredential

In [3]:
# Azure form recogizer endpoint and key
FR_ENDPOINT = 'https://anthrecognizer.cognitiveservices.azure.com/'
FR_KEY = 'c67b705d0a4947d1b05a24676f60406d'

### Step 2.1. Extract Digital ID Information With Pre-train Model

In [4]:
def get_card_information(card):
    """ Function extract param information of card """
    param_list = list(card.fields.keys())
    print("Card information had detected: ")
    for param in param_list:
        param_info = card.fields[param]
        print("Param: {} | Detect context: {} |Confidence Scorce: {}".format(param, param_info.value, param_info.confidence))

In [5]:
form_recognizer = FormRecognizerClient(FR_ENDPOINT, AzureKeyCredential(FR_KEY))

In [6]:
digital_id_url = 'https://anhthresource.blob.core.windows.net/resource/ca-dl-avkash.png?sp=racwdyti&st=2022-07-20T07:45:44Z&se=2022-07-20T15:45:44Z&spr=https&sv=2021-06-08&sr=b&sig=wxS48fBx5EOzPMPlzdFFFuZ5WynJ0%2FozhvyoPphMdQg%3D'

In [7]:
digital_id_content_url = form_recognizer.begin_recognize_identity_documents_from_url(digital_id_url)
digital_id_content = digital_id_content_url.result()

In [8]:
get_card_information(digital_id_content[0])

Card information had detected: 
Param: Address | Detect context: 1234 Circle Ave, Apt 123 San Mateo, CA, 94401 |Confidence Scorce: 0.617
Param: CountryRegion | Detect context: USA |Confidence Scorce: 0.99
Param: DateOfBirth | Detect context: 1990-01-01 |Confidence Scorce: 0.995
Param: DateOfExpiration | Detect context: 2025-01-01 |Confidence Scorce: 0.99
Param: DocumentNumber | Detect context: D1234578 |Confidence Scorce: 0.995
Param: FirstName | Detect context: AVKASH CHAUHAN |Confidence Scorce: 0.824
Param: LastName | Detect context: CHAUHAN |Confidence Scorce: 0.908
Param: Region | Detect context: California |Confidence Scorce: 0.973
Param: Sex | Detect context: M |Confidence Scorce: 0.985


### 2.2. Build Custom Boarding Pass Recognizer Model

In [9]:
form_training_client = FormTrainingClient(FR_ENDPOINT, AzureKeyCredential(FR_KEY))

In [10]:
data_train_url = 'https://anhthresource.blob.core.windows.net/anhthocrdata?sp=racwdli&st=2022-07-20T07:47:04Z&se=2022-07-20T15:47:04Z&spr=https&sv=2021-06-08&sr=c&sig=MLUVvRj1wVmxRQlXmW%2B6gbB%2BY7uWz8zQNOAGoFOKje8%3D'

#### Training model

In [11]:
model = form_training_client.begin_training(data_train_url, use_training_labels=True)
model = model.result()

In [12]:
print("Training results:")
for submodel in model.submodels:
    for name, field in submodel.fields.items():
        print("Class '{}' had accuracy is {}".format(name, field.accuracy))

Training results:
Class 'Baggage' had accuracy is 0.995
Class 'Boarding ime' had accuracy is 0.995
Class 'Carrier' had accuracy is 0.995
Class 'Class' had accuracy is 0.995
Class 'Date' had accuracy is 0.995
Class 'Flight No.' had accuracy is 0.995
Class 'From' had accuracy is 0.995
Class 'Gate' had accuracy is 0.995
Class 'Passenger Name' had accuracy is 0.909
Class 'Seat' had accuracy is 0.995
Class 'Ticket No.' had accuracy is 0.995
Class 'To' had accuracy is 0.909


#### Testing Model

In [13]:
test_url = "https://anhthresource.blob.core.windows.net/resource/1.pdf?sp=racwdyti&st=2022-07-20T07:48:21Z&se=2022-07-20T15:48:21Z&spr=https&sv=2021-06-08&sr=b&sig=s1rsYu0mdeGuPZ7O0T0t0j4kJ5XM0i9IwBPsBlLYJj8%3D"

In [14]:
test_result = form_recognizer.begin_recognize_custom_forms_from_url(model_id=model.model_id, form_url=test_url)
test_result = test_result.result()[0]

In [15]:
print("Detected Results: ")
for name, field in test_result.fields.items():
    print("Class: '{}' | Context: '{}' |Confidence score: {}".format(
            field.label_data.text if field.label_data else name,
            field.value,
            field.confidence
        ))

Detected Results: 
Class: 'Gate' | Context: 'D3' |Confidence score: 0.993
Class: 'Class' | Context: 'A' |Confidence score: 0.992
Class: 'From' | Context: 'Queen' |Confidence score: 0.991
Class: 'Ticket No.' | Context: 'ETK-737268572620C' |Confidence score: 0.991
Class: 'Carrier' | Context: 'UA' |Confidence score: 0.994
Class: 'Date' | Context: 'May 21, 2022' |Confidence score: 0.99
Class: 'Seat' | Context: '30A' |Confidence score: 0.992
Class: 'Baggage' | Context: 'NO' |Confidence score: 0.995
Class: 'To' | Context: 'Washington' |Confidence score: 0.969
Class: 'Passenger Name' | Context: 'Peter Parker' |Confidence score: 0.953
Class: 'Boarding ime' | Context: '09:00 AM PST' |Confidence score: 0.928
Class: 'Flight No.' | Context: '123' |Confidence score: 0.994
