In [1]:
!pip install --upgrade pip
!pip install -U botocore

Requirement already up-to-date: pip in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (20.0.2)
Requirement already up-to-date: botocore in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (1.15.21)


In [16]:
import io
from io import BytesIO
import sys
import boto3
from PIL import Image, ImageDraw, ImageFont
import json

In [12]:
sagemaker_client = boto3.client('sagemaker')

## Creating the Workflow Definition 

In [13]:
executionRole = 'arn:aws:iam::053520186210:role/service-role/AmazonSageMaker-ExecutionRole-20191231T143745'
workTeam = 'arn:aws:sagemaker:us-east-1:053520186210:workteam/private-crowd/textract-private-workteam'
flowDefinitionName = 'textract-demo-99-percent-with-important-form-keys-2'
taskUiArn = 'arn:aws:sagemaker:us-east-1:053520186210:human-task-ui/ronnie-test-template-textract-8'
s3OutputPath = 's3://053520186210-aws-textract-testing/output'

### HumanLoop Activation Conditions

In [17]:
humanLoopActivationConditions = json.dumps({
  "Conditions": [
    {
      "And": [
        {
          "ConditionType": "ImportantFormKeyConfidenceCheck",
          "ConditionParameters": {
            "ImportantFormKey": "*",
            "KeyValueBlockConfidenceLessThan": 100,
            "WordBlockConfidenceLessThan": 100
          }
        },
        {
          "ConditionType": "ImportantFormKeyConfidenceCheck",
          "ConditionParameters": {
            "ImportantFormKey": "*",
            "KeyValueBlockConfidenceGreaterThan": 0,
            "WordBlockConfidenceGreaterThan": 0
          }
        }
      ]
    }
  ]
})

In [20]:
create_workflow_definition_response = sagemaker_client.create_flow_definition(
        FlowDefinitionName= flowDefinitionName,
        RoleArn= executionRole,
        HumanLoopConfig= {
            "WorkteamArn": workTeam,
            "HumanTaskUiArn": taskUiArn,
            "TaskCount": 1,
            "TaskDescription": "Document analysis sample task description",
            "TaskTitle": "Document analysis sample task"
        },
        HumanLoopActivationConfig={
            "HumanLoopRequestSource": {
                "AwsManagedHumanLoopRequestSource": "AWS/Textract/AnalyzeDocument/Forms/V1"
            },
            "HumanLoopActivationConditionsConfig": {
                "HumanLoopActivationConditions": humanLoopActivationConditions
            }
        },
        OutputConfig={
            "S3OutputPath" : s3OutputPath
        }
    )
flowDefinitionArn = create_workflow_definition_response['FlowDefinitionArn'] 

## Calling Textract to Analyze Document with A2I

In [21]:
humanLoopName = 'textract-analyze-document-demo-human-loop'

In [22]:
textract_client = boto3.client('textract')

In [23]:
analyze_document_response = client.analyze_document(Document={'S3Object': {'Bucket': '053520186210-aws-textract-testing', 'Name': 'invoice-1.jpg'}},
                                                    FeatureTypes=["TABLES", "FORMS"], 
                                                    HumanLoopConfig={'FlowDefinitionArn':flowDefinitionArn,
                                                                    'HumanLoopName':humanLoopName, 
                                                                    'DataAttributes': { 'ContentClassifiers': [ 'FreeOfPersonallyIdentifiableInformation' ]}}
                                                   )

### Check if Human Loop was Activated

In [26]:
display(analyze_document_response['HumanLoopActivationOutput']['HumanLoopArn'])
display(analyze_document_response['HumanLoopActivationOutput']['HumanLoopActivationReasons'])
display(analyze_document_response['HumanLoopActivationOutput']['HumanLoopActivationConditionsEvaluationResults'])

'arn:aws:sagemaker:us-east-1:053520186210:human-loop/textract-analyze-document-demo-human-loop'

['ConditionsEvaluation']

'{"Conditions":[{"And":[{"ConditionType":"ImportantFormKeyConfidenceCheck","ConditionParameters":{"ImportantFormKey":"*","ImportantFormKeyAliases":[],"KeyValueBlockConfidenceLessThan":100.0,"WordBlockConfidenceLessThan":100.0},"EvaluationResult":true},{"ConditionType":"ImportantFormKeyConfidenceCheck","ConditionParameters":{"ImportantFormKey":"*","ImportantFormKeyAliases":[],"KeyValueBlockConfidenceGreaterThan":0.0,"WordBlockConfidenceGreaterThan":0.0},"EvaluationResult":true}],"EvaluationResult":true}]}'

## Monitoring Human Loop for Completion

In [27]:
a2i_runtime_client = boto3.client('sagemaker-a2i-runtime')

In [29]:
describe_human_loop_response = a2i_runtime_client.describe_human_loop(
    HumanLoopName=humanLoopName
)

In [33]:
display(describe_human_loop_response['HumanLoopStatus'])
display(describe_human_loop_response['HumanLoopOutput'])

'InProgress'

{'OutputS3Uri': 's3://053520186210-aws-textract-testing/output/textract-demo-99-percent-with-important-form-keys-2/2020/03/15/19/26/19/textract-analyze-document-demo-human-loop/output.json'}