In [25]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from PIL import Image, ImageDraw
from io import BytesIO
from azure.keyvault.secrets import SecretClient
from azure.identity import DefaultAzureCredential
import matplotlib.pyplot as plt
import os
import pandas as pd
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import adlfs

# Read all ID cards from local files

In [14]:
id_dir = "../material_preparation_step/IDs/"
id_imgs = []
for filename in os.listdir(id_dir):
    print(filename)
    id_imgs.append(open(id_dir+filename, "rb"))

#open("../material_preparation_step/ca-dl-johannes_czylwik.png", "rb")

ca-dl-avkash.png
ca-dl-james-jackson.png
ca-dl-james-webb.png
ca-dl-johannes_czylwik.png
ca-dl-libby-herold.png
ca-dl-radha-s-kumar.png
ca-dl-sameer-kumar.png


# Receive details using Auzure Form Recognizer

## Azure authentication
Don't forget to `az login` in cmd!

In [15]:
keyVaultName = "cvprojectkeyvault"
KVUri = f"https://{keyVaultName}.vault.azure.net"

credential = DefaultAzureCredential()
client = SecretClient(vault_url=KVUri, credential=credential)

In [16]:
AZURE_FORM_RECOGNIZER_ENDPOINT = "https://cvprojectformrecognizer.cognitiveservices.azure.com/"
secretName = "formrecognizerkey1"
retrieved_secret = client.get_secret(secretName)

endpoint = AZURE_FORM_RECOGNIZER_ENDPOINT
key = retrieved_secret.value
form_recognizer_client = FormRecognizerClient(endpoint=endpoint, credential=AzureKeyCredential(key))

## Call Form Recognizer to receive ID details

In [17]:
def get_id_card_details(identity_card):
    result = {}
    first_name = identity_card.fields.get("FirstName")
    if first_name:
        print("First Name: {} has confidence: {}".format(first_name.value, first_name.confidence))
        result['first_name'] = first_name.value
    last_name = identity_card.fields.get("LastName")
    if last_name:
        print("Last Name: {} has confidence: {}".format(last_name.value, last_name.confidence))
        result['last_name'] = last_name.value
    document_number = identity_card.fields.get("DocumentNumber")
    if document_number:
        print("Document Number: {} has confidence: {}".format(document_number.value, document_number.confidence))
        result['document_number'] = document_number.value
    dob = identity_card.fields.get("DateOfBirth")
    if dob:
        print("Date of Birth: {} has confidence: {}".format(dob.value, dob.confidence))
        result['dob'] = dob.value
    doe = identity_card.fields.get("DateOfExpiration")
    if doe:
        print("Date of Expiration: {} has confidence: {}".format(doe.value, doe.confidence))
        result['doe'] = doe.value
    sex = identity_card.fields.get("Sex")
    if sex:
        print("Sex: {} has confidence: {}".format(sex.value, sex.confidence))
        result['sex'] = sex.value
    address = identity_card.fields.get("Address")
    if address:
        print("Address: {} has confidence: {}".format(address.value, address.confidence))
        result['address'] = address.value
    country_region = identity_card.fields.get("CountryRegion")
    if country_region:
        print("Country/Region: {} has confidence: {}".format(country_region.value, country_region.confidence))
        result['country_region'] = country_region.value
    region = identity_card.fields.get("Region")
    if region:
        print("Region: {} has confidence: {}".format(region.value, region.confidence))
        result['region'] = region.value
    return result

In [18]:
id_details = []

for img in id_imgs:
    print(img)
    id_content_from_file = form_recognizer_client.begin_recognize_identity_documents(img, content_type="image/png")
    collected_id_cards = id_content_from_file.result()
    id_details.append(get_id_card_details(collected_id_cards[0]))

<_io.BufferedReader name='../material_preparation_step/IDs/ca-dl-avkash.png'>
First Name: AVKASH CHAUHAN has confidence: 0.824
Last Name: CHAUHAN has confidence: 0.908
Document Number: D1234578 has confidence: 0.995
Date of Birth: 1990-01-01 has confidence: 0.995
Date of Expiration: 2025-01-01 has confidence: 0.99
Sex: M has confidence: 0.985
Address: 1234 Circle Ave, Apt 123 San Mateo, CA, 94401 has confidence: 0.617
Country/Region: USA has confidence: 0.99
Region: California has confidence: 0.973
<_io.BufferedReader name='../material_preparation_step/IDs/ca-dl-james-jackson.png'>
First Name: James has confidence: 0.822
Last Name: Jackson has confidence: 0.843
Document Number: D9753933 has confidence: 0.995
Date of Birth: 1956-10-12 has confidence: 0.995
Date of Expiration: 2027-02-20 has confidence: 0.993
Sex: M has confidence: 0.984
Country/Region: USA has confidence: 0.99
Region: California has confidence: 0.99
<_io.BufferedReader name='../material_preparation_step/IDs/ca-dl-james-

# Store results in a Parquet file
in Azure Blob Storage

In [19]:
id_details

[{'first_name': 'AVKASH CHAUHAN',
  'last_name': 'CHAUHAN',
  'document_number': 'D1234578',
  'dob': datetime.date(1990, 1, 1),
  'doe': datetime.date(2025, 1, 1),
  'sex': 'M',
  'address': '1234 Circle Ave, Apt 123 San Mateo, CA, 94401',
  'country_region': 'USA',
  'region': 'California'},
 {'first_name': 'James',
  'last_name': 'Jackson',
  'document_number': 'D9753933',
  'dob': datetime.date(1956, 10, 12),
  'doe': datetime.date(2027, 2, 20),
  'sex': 'M',
  'country_region': 'USA',
  'region': 'California'},
 {'first_name': 'James',
  'last_name': 'Webb',
  'document_number': 'D4357789',
  'dob': datetime.date(1970, 12, 15),
  'doe': datetime.date(2026, 1, 10),
  'sex': 'M',
  'country_region': 'USA',
  'region': 'California'},
 {'first_name': 'Johannes',
  'last_name': 'Czylwik',
  'document_number': 'D1234567',
  'dob': datetime.date(1989, 9, 5),
  'doe': datetime.date(2027, 2, 26),
  'sex': 'M',
  'country_region': 'USA',
  'region': 'California'},
 {'first_name': 'Libby',
 

In [21]:
df_id_details = pd.DataFrame(id_details)
df_id_details

Unnamed: 0,first_name,last_name,document_number,dob,doe,sex,address,country_region,region
0,AVKASH CHAUHAN,CHAUHAN,D1234578,1990-01-01,2025-01-01,M,"1234 Circle Ave, Apt 123 San Mateo, CA, 94401",USA,California
1,James,Jackson,D9753933,1956-10-12,2027-02-20,M,,USA,California
2,James,Webb,D4357789,1970-12-15,2026-01-10,M,,USA,California
3,Johannes,Czylwik,D1234567,1989-09-05,2027-02-26,M,,USA,California
4,Libby,Herold,D7976131,1996-02-10,2028-09-12,F,,USA,California
5,Radha,SKumar,D5567470,1994-03-05,2027-02-20,F,,USA,California
6,Sameer,Kumar,D4556673,1990-01-25,2025-08-28,M,,USA,California


In [47]:
account_url = "https://computervisionproject.blob.core.windows.net"
default_credential = DefaultAzureCredential()

# Create the BlobServiceClient object
blob_service_client = BlobServiceClient(account_url, credential=default_credential)

In [48]:
parquet_file = BytesIO()
df_id_details.to_parquet(parquet_file, engine = 'pyarrow')
parquet_file.seek(0)

0

In [49]:
container = 'kiosk'
blob_path = 'extracted_id_details/id_details.parquet'
blob_client = blob_service_client.get_blob_client(container = container, blob = blob_path)

In [50]:
blob_client.upload_blob(data = parquet_file)

{'etag': '"0x8DC7B28FF6125A0"',
 'last_modified': datetime.datetime(2024, 5, 23, 13, 5, 19, tzinfo=datetime.timezone.utc),
 'content_md5': bytearray(b'\xa8G\xe6J\x1eXj\x8e\x01Q\xf7\x8e\x91\x9b\xaa\xa9'),
 'client_request_id': '1a556135-1905-11ef-b3df-8cf8c5f0a202',
 'request_id': 'a15facc9-301e-004a-6911-adc644000000',
 'version': '2024-05-04',
 'version_id': None,
 'date': datetime.datetime(2024, 5, 23, 13, 5, 18, tzinfo=datetime.timezone.utc),
 'request_server_encrypted': True,
 'encryption_key_sha256': None,
 'encryption_scope': None}