****Important – Do not use in production, for demonstration purposes only – please review the legal notices before continuing****

# Custom models

Extract text, structure, and fields from models trained with your data, so they're tailored to your forms and documents.

In [4]:
!pip install -q azure-ai-formrecognizer --pre --upgrade

In [2]:

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
import GlobalVariables as gv

endpoint = gv.FORM_RECOGNIZER_ENDPOINT
key = gv.FORM_RECOGNIZER_KEY
model_id = gv.FORM_RECOGNIZER_MODEL

### Document 

In [3]:
formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"

document_analysis_client = DocumentAnalysisClient(
    endpoint=endpoint, credential=AzureKeyCredential(key)
)

# Make sure your document's type is included in the list of document types the custom model can analyze
poller = document_analysis_client.begin_analyze_document_from_url(model_id, formUrl)
result = poller.result()

for idx, document in enumerate(result.documents):
    print("--------Analyzing document #{}--------".format(idx + 1))
    print("Document has type {}".format(document.doc_type))
    print("Document has confidence {}".format(document.confidence))
    print("Document was analyzed by model with ID {}".format(result.model_id))
    for name, field in document.fields.items():
        field_value = field.value if field.value else field.content
        print("......found field of type '{}' with value '{}' and with confidence {}".format(field.value_type, field_value, field.confidence))


# iterate over tables, lines, and selection marks on each page
for page in result.pages:
    print("\nLines found on page {}".format(page.page_number))
    for line in page.lines:
        print("...Line '{}'".format(line.content.encode('utf-8')))
    for word in page.words:
        print(
            "...Word '{}' has a confidence of {}".format(
                word.content.encode('utf-8'), word.confidence
            )
        )
    for selection_mark in page.selection_marks:
        print(
            "...Selection mark is '{}' and has a confidence of {}".format(
                selection_mark.state, selection_mark.confidence
            )
        )

for i, table in enumerate(result.tables):
    print("\nTable {} can be found on page:".format(i + 1))
    for region in table.bounding_regions:
        print("...{}".format(i + 1, region.page_number))
    for cell in table.cells:
        print(
            "...Cell[{}][{}] has content '{}'".format(
                cell.row_index, cell.column_index, cell.content.encode('utf-8')
            )
        )
print("-----------------------------------")


--------Analyzing document #1--------
Document has type RetailIncidentModel:RetailIncidentModel
Document has confidence 0.003
Document was analyzed by model with ID RetailIncidentModel
......found field of type 'string' with value '91-1144442' and with confidence 0.95
......found field of type 'string' with value '☐' and with confidence 0.95
......found field of type 'string' with value 'Securities registered pursuant to Section 12(b) of the Act:' and with confidence 0.064
......found field of type 'string' with value 'WASHINGTON (STATE OF INCORPORATION)' and with confidence 0.07
......found field of type 'string' with value 'SECURITIES AND EXCHANGE COMMISSION' and with confidence 0.95
......found field of type 'string' with value '1934' and with confidence 0.95
......found field of type 'list' with value 'None' and with confidence None

Lines found on page 1
...Line 'b'UNITED STATES''
...Line 'b'SECURITIES AND EXCHANGE COMMISSION''
...Line 'b'Washington, D.C. 20549''
...Line 'b'FORM 1