In [1]:
import boto3
import botocore

In [2]:
# Config
REGION = aws_region = boto3.session.Session().region_name # 'us-west-2'

BUCKET = bucket_name = 'andrew-ocr'
document_name = 'jpg-raw/IMG_0102.JPG'

In [3]:
s3 = boto3.client('s3', REGION)

textract = boto3.client('textract', aws_region) 

In [4]:
OUTPUT_PATH = f's3://{BUCKET}/human-review-ocr'

In [5]:
WORKTEAM_ARN= 'arn:aws:sagemaker:us-west-2:652963972038:workteam/private-crowd/ocr-workers'

In [9]:
from sagemaker import get_execution_role

# Setting Role to the default SageMaker Execution Role
ROLE = get_execution_role()
display(ROLE)

Couldn't call 'get_role' to get Role ARN from role name daericks@gmail.com to get Role path.


ValueError: The current AWS identity is not a role: arn:aws:iam::652963972038:user/daericks@gmail.com, therefore it cannot be used as a SageMaker execution role

In [7]:
response = textract.analyze_document(
            Document={'S3Object': {'Bucket': bucket_name, 'Name': document_name}},
            FeatureTypes=["TABLES", "FORMS"],
            HumanLoopConfig={
                'FlowDefinitionArn': 'arn:aws:sagemaker:us-west-2:652963972038:flow-definition/human-review-ocr',
                'HumanLoopName': 'human_loop_first_try',
                'DataAttributes': {'ContentClassifiers': ['FreeOfPersonallyIdentifiableInformation','FreeOfAdultContent']}
            }
          )

InvalidParameterException: An error occurred (InvalidParameterException) when calling the AnalyzeDocument operation: Request has invalid parameters

In [10]:
import boto3
import io
import json
import uuid
import botocore
import time
import botocore

# Amazon SageMaker client
sagemaker = boto3.client('sagemaker', REGION)

# Amazon Textract client
textract = boto3.client('textract', REGION)

# S3 client
s3 = boto3.client('s3', REGION)

# A2I Runtime client
a2i_runtime_client = boto3.client('sagemaker-a2i-runtime', REGION)

In [11]:
import pprint

# Pretty print setup
pp = pprint.PrettyPrinter(indent=2)

# Function to pretty-print AWS SDK responses
def print_response(response):
    if 'ResponseMetadata' in response:
        del response['ResponseMetadata']
    pp.pprint(response)

In [17]:
flowDefinitionArn='arn:aws:sagemaker:us-west-2:652963972038:flow-definition/human-review-ocr'

In [18]:
uniqueId = str(uuid.uuid4())
human_loop_unique_id = uniqueId + '1'

humanLoopConfig = {
    'FlowDefinitionArn':flowDefinitionArn,
    'HumanLoopName':human_loop_unique_id, 
    'DataAttributes': { 'ContentClassifiers': [ 'FreeOfPersonallyIdentifiableInformation' ]}
}

In [20]:
def analyze_document_with_a2i(document_name, bucket):
    response = textract.analyze_document(
        Document={'S3Object': {'Bucket': bucket, 'Name': document_name}},
        FeatureTypes=["TABLES", "FORMS"], 
        HumanLoopConfig=humanLoopConfig
    )
    return response

In [23]:
document= document_name
analyzeDocumentResponse = analyze_document_with_a2i(document, BUCKET)

In [24]:
if 'HumanLoopArn' in analyzeDocumentResponse['HumanLoopActivationOutput']:
    # A human loop has been started!
    print(f'A human loop has been started with ARN: {analyzeDocumentResponse["HumanLoopActivationOutput"]["HumanLoopArn"]}')

A human loop has been started with ARN: arn:aws:sagemaker:us-west-2:652963972038:human-loop/5ec0218b-ed39-439a-8371-2a72ea394a381


In [25]:
all_human_loops_in_workflow = a2i_runtime_client.list_human_loops(FlowDefinitionArn=flowDefinitionArn)['HumanLoopSummaries']

for human_loop in all_human_loops_in_workflow:
    print(f'\nHuman Loop Name: {human_loop["HumanLoopName"]}')
    print(f'Human Loop Status: {human_loop["HumanLoopStatus"]} \n')
    print('\n')


Human Loop Name: 5ec0218b-ed39-439a-8371-2a72ea394a381
Human Loop Status: InProgress 





In [26]:
workteamName = WORKTEAM_ARN[WORKTEAM_ARN.rfind('/') + 1:]
print("Navigate to the private worker portal and do the tasks. Make sure you've invited yourself to your workteam!")
print('https://' + sagemaker.describe_workteam(WorkteamName=workteamName)['Workteam']['SubDomain'])


Navigate to the private worker portal and do the tasks. Make sure you've invited yourself to your workteam!
https://zibdg78f1b.labeling.us-west-2.sagemaker.aws


In [27]:
all_human_loops_in_workflow = a2i_runtime_client.list_human_loops(FlowDefinitionArn=flowDefinitionArn)['HumanLoopSummaries']

completed_loops = []
for human_loop in all_human_loops_in_workflow:
    print(f'\nHuman Loop Name: {human_loop["HumanLoopName"]}')
    print(f'Human Loop Status: {human_loop["HumanLoopStatus"]} \n')
    print('\n')
    if human_loop['HumanLoopStatus'] == 'Completed':
        completed_loops.append(human_loop['HumanLoopName'])


Human Loop Name: 5ec0218b-ed39-439a-8371-2a72ea394a381
Human Loop Status: InProgress 





In [29]:
import re
import pprint
pp = pprint.PrettyPrinter(indent=2)

def retrieve_a2i_results_from_output_s3_uri(bucket, a2i_s3_output_uri):
    '''
    Gets the json file published by A2I and returns a deserialized object
    '''
    splitted_string = re.split('s3://' +  bucket + '/', a2i_s3_output_uri)
    output_bucket_key = splitted_string[1]

    response = s3.get_object(Bucket=bucket, Key=output_bucket_key)
    content = response["Body"].read()
    return json.loads(content)
    

for human_loop_name in completed_loops:

    describe_human_loop_response = a2i_runtime_client.describe_human_loop(
        HumanLoopName=human_loop_name
    )
    
    print(f'\nHuman Loop Name: {describe_human_loop_response["HumanLoopName"]}')
    print(f'Human Loop Status: {describe_human_loop_response["HumanLoopStatus"]}')
    print(f'Human Loop Output Location: : {describe_human_loop_response["HumanLoopOutput"]["OutputS3Uri"]} \n')
    
    # Uncomment below line to print out a2i human answers
    output = retrieve_a2i_results_from_output_s3_uri(BUCKET, describe_human_loop_response['HumanLoopOutput']['OutputS3Uri'])
#     pp.pprint(output)

In [30]:
import webbrowser, os
import json
import boto3
import io
from io import BytesIO
import sys
from pprint import pprint


def get_rows_columns_map(table_result, blocks_map):
    rows = {}
    for relationship in table_result['Relationships']:
        if relationship['Type'] == 'CHILD':
            for child_id in relationship['Ids']:
                cell = blocks_map[child_id]
                if cell['BlockType'] == 'CELL':
                    row_index = cell['RowIndex']
                    col_index = cell['ColumnIndex']
                    if row_index not in rows:
                        # create new row
                        rows[row_index] = {}
                        
                    # get the text value
                    rows[row_index][col_index] = get_text(cell, blocks_map)
    return rows


def get_text(result, blocks_map):
    text = ''
    if 'Relationships' in result:
        for relationship in result['Relationships']:
            if relationship['Type'] == 'CHILD':
                for child_id in relationship['Ids']:
                    word = blocks_map[child_id]
                    if word['BlockType'] == 'WORD':
                        text += word['Text'] + ' '
                    if word['BlockType'] == 'SELECTION_ELEMENT':
                        if word['SelectionStatus'] =='SELECTED':
                            text +=  'X '    
    return text


def get_table_csv_results(file_name):

    with open(file_name, 'rb') as file:
        img_test = file.read()
        bytes_test = bytearray(img_test)
        print('Image loaded', file_name)

    # process using image bytes
    # get the results
    client = boto3.client('textract')

    response = client.analyze_document(Document={'Bytes': bytes_test}, FeatureTypes=['TABLES'])

    # Get the text blocks
    blocks=response['Blocks']
    pprint(blocks)

    blocks_map = {}
    table_blocks = []
    for block in blocks:
        blocks_map[block['Id']] = block
        if block['BlockType'] == "TABLE":
            table_blocks.append(block)

    if len(table_blocks) <= 0:
        return "<b> NO Table FOUND </b>"

    csv = ''
    for index, table in enumerate(table_blocks):
        csv += generate_table_csv(table, blocks_map, index +1)
        csv += '\n\n'

    return csv

def generate_table_csv(table_result, blocks_map, table_index):
    rows = get_rows_columns_map(table_result, blocks_map)

    table_id = 'Table_' + str(table_index)
    
    # get cells.
    csv = 'Table: {0}\n\n'.format(table_id)

    for row_index, cols in rows.items():
        
        for col_index, text in cols.items():
            csv += '{}'.format(text) + ","
        csv += '\n'
        
    csv += '\n\n\n'
    return csv

def main(file_name):
    table_csv = get_table_csv_results(file_name)

    output_file = 'output.csv'

    # replace content
    with open(output_file, "wt") as fout:
        fout.write(table_csv)

    # show the results
    print('CSV OUTPUT FILE: ', output_file)


if __name__ == "__main__":
    file_name = sys.argv[1]
    main(file_name)

FileNotFoundError: [Errno 2] No such file or directory: '-f'

In [31]:
def get_csv(response):
    # Get the text blocks
    blocks=response['Blocks']
    pprint(blocks)

    blocks_map = {}
    table_blocks = []
    for block in blocks:
        blocks_map[block['Id']] = block
        if block['BlockType'] == "TABLE":
            table_blocks.append(block)

    if len(table_blocks) <= 0:
        return "<b> NO Table FOUND </b>"

    csv = ''
    for index, table in enumerate(table_blocks):
        csv += generate_table_csv(table, blocks_map, index +1)
        csv += '\n\n'

    return csv

In [34]:
table_csv = get_csv(analyzeDocumentResponse)

[{'BlockType': 'PAGE',
  'Geometry': {'BoundingBox': {'Height': 0.9640370607376099,
                               'Left': 0.035365909337997437,
                               'Top': 0.025402884930372238,
                               'Width': 0.9238397479057312},
               'Polygon': [{'X': 0.08712485432624817,
                            'Y': 0.025402884930372238},
                           {'X': 0.9392639994621277, 'Y': 0.047021832317113876},
                           {'X': 0.959205687046051, 'Y': 0.9564330577850342},
                           {'X': 0.035365909337997437,
                            'Y': 0.9894399642944336}]},
  'Id': '37e7062d-27d7-4c2c-8801-1facfd7b46f2',
  'Relationships': [{'Ids': ['c080124e-0b9f-4fd1-abf7-e424ca7b15be',
                             '93b97a87-d0fe-4d09-8f59-94c92d09a319',
                             '3729ecc9-c37f-41ce-abb4-66220739dc4d',
                             'e50affba-6f0a-4cc3-8062-04bc589aab15',
                             '

                           {'X': 0.2358238846063614, 'Y': 0.332964152097702}]},
  'Id': 'c24dbdb4-01d8-4b8c-a654-567f2b1153f1',
  'Relationships': [{'Ids': ['e7ec31f1-af9e-4176-be18-f9bc68276592'],
                     'Type': 'CHILD'}],
  'Text': '---'},
 {'BlockType': 'LINE',
  'Confidence': 97.33871459960938,
  'Geometry': {'BoundingBox': {'Height': 0.016832545399665833,
                               'Left': 0.317301481962204,
                               'Top': 0.320131778717041,
                               'Width': 0.03990597277879715},
               'Polygon': [{'X': 0.3178389072418213, 'Y': 0.320131778717041},
                           {'X': 0.3572074770927429, 'Y': 0.32039767503738403},
                           {'X': 0.35672783851623535, 'Y': 0.33696433901786804},
                           {'X': 0.317301481962204, 'Y': 0.33674025535583496}]},
  'Id': 'b3ceceab-e5e5-45fd-9160-961e9cf7723f',
  'Relationships': [{'Ids': ['16690fe3-85a5-408a-aca2-347a39bf69e1'],
        

                               'Width': 0.0719766840338707},
               'Polygon': [{'X': 0.3020953834056854, 'Y': 0.4884309470653534},
                           {'X': 0.3734308183193207, 'Y': 0.4881393611431122},
                           {'X': 0.3729093372821808, 'Y': 0.507343590259552},
                           {'X': 0.3014541268348694, 'Y': 0.507724404335022}]},
  'Id': '98b61143-4224-47f4-a248-42a17d132ce7',
  'Relationships': [{'Ids': ['d7afff92-1bd3-4f92-bc6e-b6b87b67db44',
                             'b9f5e795-8eaf-430c-82df-b0b1d80f1525'],
                     'Type': 'CHILD'}],
  'Text': "City's 2/3"},
 {'BlockType': 'LINE',
  'Confidence': 99.54641723632812,
  'Geometry': {'BoundingBox': {'Height': 0.01905064843595028,
                               'Left': 0.3972044587135315,
                               'Top': 0.4875005781650543,
                               'Width': 0.06997209787368774},
               'Polygon': [{'X': 0.3976753056049347, 'Y': 0.487783491611

                               'Width': 0.19703079760074615},
               'Polygon': [{'X': 0.7028900384902954, 'Y': 0.6252723932266235},
                           {'X': 0.8995585441589355, 'Y': 0.6226687431335449},
                           {'X': 0.8999208807945251, 'Y': 0.6433535218238831},
                           {'X': 0.7029095888137817, 'Y': 0.6462345719337463}]},
  'Id': '64851928-f8f9-41f8-a8d2-081870250c80',
  'Relationships': [{'Ids': ['ba182843-8002-4275-a7e2-7451d6327b43',
                             '55ed9d3c-4604-4eef-8639-5b0450ce948d',
                             '6a1450c3-c047-4676-b2a1-d7195dfc1b96',
                             '23659d0c-d3c5-47cc-ba2e-46363e0a7bf0',
                             'fdeec1a8-1964-4a48-84e2-12b5e8138315'],
                     'Type': 'CHILD'}],
  'Text': '$6,500; Planning Engineer - $6,000.'},
 {'BlockType': 'LINE',
  'Confidence': 13.388219833374023,
  'Geometry': {'BoundingBox': {'Height': 0.017412738874554634,
              

                               'Width': 0.03399805724620819},
               'Polygon': [{'X': 0.48165300488471985, 'Y': 0.7648686766624451},
                           {'X': 0.5155116319656372, 'Y': 0.764119029045105},
                           {'X': 0.5153949856758118, 'Y': 0.7720626592636108},
                           {'X': 0.4815135896205902, 'Y': 0.7728303670883179}]},
  'Id': '470e1ecf-1e44-4c63-ba33-0381ba68e5c3',
  'Relationships': [{'Ids': ['340a146e-5017-4997-9725-026f90ae5b17'],
                     'Type': 'CHILD'}],
  'Text': '---'},
 {'BlockType': 'LINE',
  'Confidence': 37.16981887817383,
  'Geometry': {'BoundingBox': {'Height': 0.006700159516185522,
                               'Left': 0.5948331356048584,
                               'Top': 0.7624481320381165,
                               'Width': 0.03213481232523918},
               'Polygon': [{'X': 0.594881534576416, 'Y': 0.763160228729248},
                           {'X': 0.6269679665565491, 'Y': 0.7624481

               'Polygon': [{'X': 0.7206615805625916, 'Y': 0.857650101184845},
                           {'X': 0.7578430771827698, 'Y': 0.856585681438446},
                           {'X': 0.7579385638237, 'Y': 0.8740774989128113},
                           {'X': 0.7207034230232239, 'Y': 0.8751866817474365}]},
  'Id': 'e369b370-fc8b-4304-879f-265321be7311',
  'Relationships': [{'Ids': ['47658444-0209-4ee1-8f18-b466b5548c2d'],
                     'Type': 'CHILD'}],
  'Text': '8,300'},
 {'BlockType': 'LINE',
  'Confidence': 70.84752655029297,
  'Geometry': {'BoundingBox': {'Height': 0.03532562777400017,
                               'Left': 0.8721034526824951,
                               'Top': 0.8493446707725525,
                               'Width': 0.08059661090373993},
               'Polygon': [{'X': 0.8721034526824951, 'Y': 0.8516227602958679},
                           {'X': 0.9519926309585571, 'Y': 0.8493446707725525},
                           {'X': 0.9527000784873962,

                            'Y': 0.17307277023792267}]},
  'Id': '60625187-5cc9-49e0-8a35-5440eb9b9341',
  'Text': 'Other',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.7433090209961,
  'Geometry': {'BoundingBox': {'Height': 0.01670129783451557,
                               'Left': 0.12879790365695953,
                               'Top': 0.17929184436798096,
                               'Width': 0.008091578260064125},
               'Polygon': [{'X': 0.12961511313915253, 'Y': 0.17929184436798096},
                           {'X': 0.1368894875049591, 'Y': 0.17940562963485718},
                           {'X': 0.1360832154750824, 'Y': 0.19599315524101257},
                           {'X': 0.12879790365695953,
                            'Y': 0.19588690996170044}]},
  'Id': 'fdfb46dc-d5ae-408a-90d9-644bdd13e893',
  'Text': '$',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 98.9921875,
  'Geometry': {'BoundingBox': {'Height': 0.01639925874769

                           {'X': 0.8514984846115112, 'Y': 0.3500155508518219}]},
  'Id': 'f90ab2c8-5105-4edf-8dfa-b7a0a152384d',
  'Text': 'Finance',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 74.38856506347656,
  'Geometry': {'BoundingBox': {'Height': 0.020265741273760796,
                               'Left': 0.8927536010742188,
                               'Top': 0.3376920521259308,
                               'Width': 0.038974862545728683},
               'Polygon': [{'X': 0.8927536010742188, 'Y': 0.3376920521259308},
                           {'X': 0.9313138723373413, 'Y': 0.3379172086715698},
                           {'X': 0.9317284822463989, 'Y': 0.3579578101634979},
                           {'X': 0.8931024074554443, 'Y': 0.3577842116355896}]},
  'Id': '870d866b-51bb-403c-9334-61ef90ecc52a',
  'Text': 'Director-',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 21.22591781616211,
  'Geometry': {'BoundingBox': {'Height': 0.0168419

 {'BlockType': 'WORD',
  'Confidence': 99.98631286621094,
  'Geometry': {'BoundingBox': {'Height': 0.013775497674942017,
                               'Left': 0.8290768265724182,
                               'Top': 0.3901008069515228,
                               'Width': 0.013855589553713799},
               'Polygon': [{'X': 0.8290768265724182, 'Y': 0.3901008069515228},
                           {'X': 0.8427540063858032, 'Y': 0.3901325762271881},
                           {'X': 0.842932403087616, 'Y': 0.40387630462646484},
                           {'X': 0.8292391896247864, 'Y': 0.4038570821285248}]},
  'Id': 'f3ad4ac4-e4f1-4495-9647-8fcd4c696d29',
  'Text': 'of',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 95.9033203125,
  'Geometry': {'BoundingBox': {'Height': 0.021961640566587448,
                               'Left': 0.8461247086524963,
                               'Top': 0.38969701528549194,
                               'Width': 0.07997190207242

                           {'X': 0.14316077530384064, 'Y': 0.5417154431343079},
                           {'X': 0.142933651804924, 'Y': 0.5465916395187378},
                           {'X': 0.13685187697410583,
                            'Y': 0.5466387867927551}]},
  'Id': '44f53b15-5585-409a-9843-7fe5936bd779',
  'Text': '-',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.15485382080078,
  'Geometry': {'BoundingBox': {'Height': 0.005193718243390322,
                               'Left': 0.14933079481124878,
                               'Top': 0.5415539741516113,
                               'Width': 0.006664390675723553},
               'Polygon': [{'X': 0.14956769347190857, 'Y': 0.5416017770767212},
                           {'X': 0.1559951901435852, 'Y': 0.5415539741516113},
                           {'X': 0.15576119720935822, 'Y': 0.546697735786438},
                           {'X': 0.14933079481124878,
                            'Y': 0.54674768447875

                           {'X': 0.856671929359436, 'Y': 0.6401625871658325}]},
  'Id': 'fdeec1a8-1964-4a48-84e2-12b5e8138315',
  'Text': '$6,000.',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 13.388219833374023,
  'Geometry': {'BoundingBox': {'Height': 0.01741272583603859,
                               'Left': 0.1382254660129547,
                               'Top': 0.6524526476860046,
                               'Width': 0.029520509764552116},
               'Polygon': [{'X': 0.13901562988758087, 'Y': 0.6528698205947876},
                           {'X': 0.16774597764015198, 'Y': 0.6524526476860046},
                           {'X': 0.1669982522726059, 'Y': 0.6694164276123047},
                           {'X': 0.1382254660129547, 'Y': 0.6698653697967529}]},
  'Id': 'bef54e3c-4afe-4e8e-9b9a-08a493f92b99',
  'Text': '- 440',
  'TextType': 'HANDWRITING'},
 {'BlockType': 'WORD',
  'Confidence': 85.49058532714844,
  'Geometry': {'BoundingBox': {'Height': 0.018821

                           {'X': 0.8126551508903503, 'Y': 0.7597397565841675},
                           {'X': 0.7963292598724365, 'Y': 0.7601036429405212}]},
  'Id': 'cbe7d3ea-99c1-44e8-83d9-9f0f4d7b38db',
  'Text': '- .',
  'TextType': 'HANDWRITING'},
 {'BlockType': 'WORD',
  'Confidence': 89.23318481445312,
  'Geometry': {'BoundingBox': {'Height': 0.004637797363102436,
                               'Left': 0.8687885999679565,
                               'Top': 0.7516878247261047,
                               'Width': 0.005603189114481211},
               'Polygon': [{'X': 0.8687885999679565, 'Y': 0.7518087029457092},
                           {'X': 0.8743230700492859, 'Y': 0.7516878247261047},
                           {'X': 0.8743917942047119, 'Y': 0.7562029957771301},
                           {'X': 0.8688552379608154, 'Y': 0.7563256025314331}]},
  'Id': '4e587409-ef95-4582-9294-4273ac188442',
  'Text': '-',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence'

 {'BlockType': 'WORD',
  'Confidence': 53.00339889526367,
  'Geometry': {'BoundingBox': {'Height': 0.021069345995783806,
                               'Left': 0.909161388874054,
                               'Top': 0.8363332152366638,
                               'Width': 0.03144632652401924},
               'Polygon': [{'X': 0.909161388874054, 'Y': 0.8371902704238892},
                           {'X': 0.9401928186416626, 'Y': 0.8363332152366638},
                           {'X': 0.9406077265739441, 'Y': 0.8565019369125366},
                           {'X': 0.9095252156257629, 'Y': 0.8574025630950928}]},
  'Id': '4acd349b-4f19-4010-979b-c69a4dc77fb6',
  'Text': 'Officer',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 95.51986694335938,
  'Geometry': {'BoundingBox': {'Height': 0.00498136505484581,
                               'Left': 0.12009652704000473,
                               'Top': 0.8912057280540466,
                               'Width': 0.006588250

  'Id': '0c2e038e-ae02-486b-9d1c-3ada1c2c74e5',
  'RowIndex': 1,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 1,
  'ColumnSpan': 1,
  'Confidence': 74.0131607055664,
  'Geometry': {'BoundingBox': {'Height': 0.05457457900047302,
                               'Left': 0.11789075285196304,
                               'Top': 0.14784200489521027,
                               'Width': 0.1010323092341423},
               'Polygon': [{'X': 0.12055841833353043, 'Y': 0.14784200489521027},
                           {'X': 0.21892306208610535, 'Y': 0.14957645535469055},
                           {'X': 0.21672862768173218, 'Y': 0.2024165838956833},
                           {'X': 0.11789075285196304,
                            'Y': 0.20100842416286469}]},
  'Id': '873f1029-0b80-400a-b580-59abdc0c392b',
  'Relationships': [{'Ids': ['fdfb46dc-d5ae-408a-90d9-644bdd13e893',
                             '9f9c4a5c-1895-4646-9cad-5a840e19b5f9'],
                     'Type': 'CHILD'}],


                            'Y': 0.4488447308540344}]},
  'Id': 'cfe2c078-9526-4503-82d5-aa4c32b376b7',
  'Relationships': [{'Ids': ['0074e1e2-3f4a-4816-bba2-5386d122fc8a'],
                     'Type': 'CHILD'}],
  'RowIndex': 4,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 6,
  'ColumnSpan': 1,
  'Confidence': 81.12728881835938,
  'Geometry': {'BoundingBox': {'Height': 0.14556725323200226,
                               'Left': 0.5699013471603394,
                               'Top': 0.3031274676322937,
                               'Width': 0.11016041785478592},
               'Polygon': [{'X': 0.5714200139045715, 'Y': 0.3031274676322937},
                           {'X': 0.6800617575645447, 'Y': 0.30399447679519653},
                           {'X': 0.6799145340919495, 'Y': 0.44852352142333984},
                           {'X': 0.5699013471603394,
                            'Y': 0.44869473576545715}]},
  'Id': 'ef69d13f-cdcc-4f51-b4a7-8703f5e4fabc',
  'Relationships'

  'Geometry': {'BoundingBox': {'Height': 0.04647768288850784,
                               'Left': 0.09631495177745819,
                               'Top': 0.5845347046852112,
                               'Width': 0.10454439371824265},
               'Polygon': [{'X': 0.09859462827444077, 'Y': 0.5855786204338074},
                           {'X': 0.20085935294628143, 'Y': 0.5845347046852112},
                           {'X': 0.19898496568202972, 'Y': 0.629668116569519},
                           {'X': 0.09631495177745819,
                            'Y': 0.6310123801231384}]},
  'Id': '371cedaa-3481-410c-8f58-3e92b2cee871',
  'Relationships': [{'Ids': ['25eff1d8-1ba8-41c9-97af-07338f645cd7'],
                     'Type': 'CHILD'}],
  'RowIndex': 7,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 2,
  'ColumnSpan': 1,
  'Confidence': 93.99156188964844,
  'Geometry': {'BoundingBox': {'Height': 0.0459829680621624,
                               'Left': 0.19898496568202972,

                           {'X': 0.19402723014354706, 'Y': 0.7490460276603699},
                           {'X': 0.09028466045856476,
                            'Y': 0.7511956691741943}]},
  'Id': '4f09de6c-b06b-41f3-9911-7145468dd5d7',
  'Relationships': [{'Ids': ['f12547a0-e723-4904-9e3c-d8a4bcb70fd6'],
                     'Type': 'CHILD'}],
  'RowIndex': 9,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 2,
  'ColumnSpan': 1,
  'Confidence': 89.65789794921875,
  'Geometry': {'BoundingBox': {'Height': 0.08313155919313431,
                               'Left': 0.19402723014354706,
                               'Top': 0.6659144759178162,
                               'Width': 0.0872211828827858},
               'Polygon': [{'X': 0.1974257379770279, 'Y': 0.6672130823135376},
                           {'X': 0.28124842047691345, 'Y': 0.6659144759178162},
                           {'X': 0.27844133973121643, 'Y': 0.7472968697547913},
                           {'X': 0.194027

  'Geometry': {'BoundingBox': {'Height': 0.06431261450052261,
                               'Left': 0.46599942445755005,
                               'Top': 0.7865821719169617,
                               'Width': 0.10037682950496674},
               'Polygon': [{'X': 0.46715694665908813, 'Y': 0.788932204246521},
                           {'X': 0.566376268863678, 'Y': 0.7865821719169617},
                           {'X': 0.5657341480255127, 'Y': 0.8481307029724121},
                           {'X': 0.46599942445755005,
                            'Y': 0.8508948087692261}]},
  'Id': '46e54715-7ccd-4dec-9857-a4d85c5e7548',
  'Relationships': [{'Ids': ['40ac49b9-b321-4a29-8f6c-1f17a4a4c9c8'],
                     'Type': 'CHILD'}],
  'RowIndex': 11,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 6,
  'ColumnSpan': 1,
  'Confidence': 75.11051177978516,
  'Geometry': {'BoundingBox': {'Height': 0.06422954797744751,
                               'Left': 0.5657341480255127,
 

                     'Type': 'CHILD'}]},
 {'BlockType': 'KEY_VALUE_SET',
  'Confidence': 22.0,
  'EntityTypes': ['KEY'],
  'Geometry': {'BoundingBox': {'Height': 0.022960208356380463,
                               'Left': 0.747612476348877,
                               'Top': 0.22646668553352356,
                               'Width': 0.041656848043203354},
               'Polygon': [{'X': 0.747612476348877, 'Y': 0.22646668553352356},
                           {'X': 0.7890791893005371, 'Y': 0.22701217234134674},
                           {'X': 0.7892693281173706, 'Y': 0.24942690134048462},
                           {'X': 0.7477219104766846,
                            'Y': 0.24894225597381592}]},
  'Id': '7dcb16bf-3dd6-4075-bed5-b5321321aa1e',
  'Relationships': [{'Ids': ['cda3d698-d10a-443f-9e18-a5ba30fe91f7'],
                     'Type': 'VALUE'}]},
 {'BlockType': 'KEY_VALUE_SET',
  'Confidence': 22.0,
  'EntityTypes': ['VALUE'],
  'Geometry': {'BoundingBox': {'Height': 0.018

In [36]:
output_file = 'output.csv'

# replace content
with open(output_file, "wt") as fout:
    fout.write(table_csv)

# show the results
print('CSV OUTPUT FILE: ', output_file)

CSV OUTPUT FILE:  output.csv


In [47]:
def analyze_pdf_document_with_a2i(document_name, bucket):
    response = textract.start_document_analysis(
        DocumentLocation={'S3Object': {'Bucket': bucket, 'Name': document_name}},
        FeatureTypes=["TABLES", "FORMS"], 
#         HumanLoopConfig=humanLoopConfig
    )
    return response

In [52]:
doc= 'jpg-raw/part1-thru0309.pdf'
analyzePDF = analyze_pdf_document_with_a2i(doc, BUCKET)

In [56]:
table_csv = get_csv(analyzePDF)
output_file = 'output-part1.csv'

# replace content
with open(output_file, "wt") as fout:
    fout.write(table_csv)

# show the results
print('CSV OUTPUT FILE: ', output_file)

KeyError: 'Blocks'

In [55]:
analyzePDF

{'JobId': 'e65b673f999d48fdbfbb064a2b3ea62d9e9c57012997686442b9ea7231c914fa',
 'ResponseMetadata': {'RequestId': '8f9b4731-9939-44b1-ac4c-9f8df69caae8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8f9b4731-9939-44b1-ac4c-9f8df69caae8',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '76',
   'date': 'Thu, 09 Sep 2021 23:39:54 GMT'},
  'RetryAttempts': 0}}

In [57]:
analyzePDF['JobId']

'e65b673f999d48fdbfbb064a2b3ea62d9e9c57012997686442b9ea7231c914fa'

In [58]:
analyzePDF_results= textract.get_document_analysis(
    JobId=analyzePDF['JobId']
)

In [59]:
analyzePDF_results

{'DocumentMetadata': {'Pages': 241},
 'JobStatus': 'SUCCEEDED',
 'NextToken': 'mZEttZNZj0yBDvf+PqnVRkH7IbjaezVAZMfzAZQtU1GhViF/r05C7Rub2RKYi1B49xM7C52FCzWv0BMo0sJYerpAkSpMFaPvXsllJ/Xdyi55HfZ1ng==',
 'Blocks': [{'BlockType': 'PAGE',
   'Geometry': {'BoundingBox': {'Width': 0.988946795463562,
     'Height': 0.976067066192627,
     'Left': 0.005862809717655182,
     'Top': 0.005486753303557634},
    'Polygon': [{'X': 0.005862809717655182, 'Y': 0.9114241003990173},
     {'X': 0.01933825947344303, 'Y': 0.02792367897927761},
     {'X': 0.9848500490188599, 'Y': 0.005486753303557634},
     {'X': 0.994809627532959, 'Y': 0.9815537929534912}]},
   'Id': 'e58e7036-87a4-4d67-b20e-f75890b507cc',
   'Relationships': [{'Type': 'CHILD',
     'Ids': ['8db5b508-4482-49fd-96de-32fbbe36a816',
      '348db637-ab12-4a38-a98f-02c10cdf35e0',
      '4bbb078a-94c6-4c46-95f0-613411a76ab3',
      '0e23d71e-6d51-4025-b703-3a43c67d34c5',
      '6ea0357f-7eb3-49c1-a6f2-9fca0219f529',
      'f9525d88-71d1-403a-a240-9e

In [61]:
import json
with open('pdf-results-part1.txt', 'w') as file:
    file.write(json.dumps(analyzePDF_results)) # use 'json.loads' to do the reverse

In [62]:
analyzePDF_results

{'DocumentMetadata': {'Pages': 241},
 'JobStatus': 'SUCCEEDED',
 'NextToken': 'mZEttZNZj0yBDvf+PqnVRkH7IbjaezVAZMfzAZQtU1GhViF/r05C7Rub2RKYi1B49xM7C52FCzWv0BMo0sJYerpAkSpMFaPvXsllJ/Xdyi55HfZ1ng==',
 'Blocks': [{'BlockType': 'PAGE',
   'Geometry': {'BoundingBox': {'Width': 0.988946795463562,
     'Height': 0.976067066192627,
     'Left': 0.005862809717655182,
     'Top': 0.005486753303557634},
    'Polygon': [{'X': 0.005862809717655182, 'Y': 0.9114241003990173},
     {'X': 0.01933825947344303, 'Y': 0.02792367897927761},
     {'X': 0.9848500490188599, 'Y': 0.005486753303557634},
     {'X': 0.994809627532959, 'Y': 0.9815537929534912}]},
   'Id': 'e58e7036-87a4-4d67-b20e-f75890b507cc',
   'Relationships': [{'Type': 'CHILD',
     'Ids': ['8db5b508-4482-49fd-96de-32fbbe36a816',
      '348db637-ab12-4a38-a98f-02c10cdf35e0',
      '4bbb078a-94c6-4c46-95f0-613411a76ab3',
      '0e23d71e-6d51-4025-b703-3a43c67d34c5',
      '6ea0357f-7eb3-49c1-a6f2-9fca0219f529',
      'f9525d88-71d1-403a-a240-9e

In [64]:
results_csv = get_csv(analyzePDF_results)

[{'BlockType': 'PAGE',
  'Geometry': {'BoundingBox': {'Height': 0.976067066192627,
                               'Left': 0.005862809717655182,
                               'Top': 0.005486753303557634,
                               'Width': 0.988946795463562},
               'Polygon': [{'X': 0.005862809717655182, 'Y': 0.9114241003990173},
                           {'X': 0.01933825947344303, 'Y': 0.02792367897927761},
                           {'X': 0.9848500490188599, 'Y': 0.005486753303557634},
                           {'X': 0.994809627532959, 'Y': 0.9815537929534912}]},
  'Id': 'e58e7036-87a4-4d67-b20e-f75890b507cc',
  'Page': 1,
  'Relationships': [{'Ids': ['8db5b508-4482-49fd-96de-32fbbe36a816',
                             '348db637-ab12-4a38-a98f-02c10cdf35e0',
                             '4bbb078a-94c6-4c46-95f0-613411a76ab3',
                             '0e23d71e-6d51-4025-b703-3a43c67d34c5',
                             '6ea0357f-7eb3-49c1-a6f2-9fca0219f529',
       

                               'Top': 0.26335278153419495,
                               'Width': 0.0038087849970906973},
               'Polygon': [{'X': 0.2778306007385254, 'Y': 0.2680690884590149},
                           {'X': 0.27787014842033386, 'Y': 0.26335278153419495},
                           {'X': 0.2816393971443176, 'Y': 0.2633594572544098},
                           {'X': 0.28160029649734497,
                            'Y': 0.2680776119232178}]},
  'Id': '88cad1e6-932c-4a6f-9d85-daacf151c940',
  'Page': 1,
  'Relationships': [{'Ids': ['09276d29-247f-4f9b-8baf-879a15390ccc'],
                     'Type': 'CHILD'}],
  'Text': '-'},
 {'BlockType': 'LINE',
  'Confidence': 42.684959411621094,
  'Geometry': {'BoundingBox': {'Height': 0.038145728409290314,
                               'Left': 0.27629464864730835,
                               'Top': 0.6490609645843506,
                               'Width': 0.015277405269443989},
               'Polygon': [{'X': 0.276

 {'BlockType': 'LINE',
  'Confidence': 55.202667236328125,
  'Geometry': {'BoundingBox': {'Height': 0.03954208269715309,
                               'Left': 0.35656484961509705,
                               'Top': 0.6486471891403198,
                               'Width': 0.016680080443620682},
               'Polygon': [{'X': 0.35656484961509705, 'Y': 0.6874432563781738},
                           {'X': 0.35680750012397766, 'Y': 0.6486471891403198},
                           {'X': 0.37324491143226624, 'Y': 0.6493269205093384},
                           {'X': 0.37301841378211975,
                            'Y': 0.6881892681121826}]},
  'Id': 'f81d6b98-7a37-442f-a76f-f2a650e3ed41',
  'Page': 1,
  'Relationships': [{'Ids': ['f6df8d1e-df69-4ed5-9d0d-0b5036959b76'],
                     'Type': 'CHILD'}],
  'Text': '7.000'},
 {'BlockType': 'LINE',
  'Confidence': 15.349268913269043,
  'Geometry': {'BoundingBox': {'Height': 0.03685763105750084,
                               'Left

                             '77b2404f-b751-4821-a389-4855298e6d55'],
                     'Type': 'CHILD'}],
  'Text': 'Supt. Water 1'},
 {'BlockType': 'LINE',
  'Confidence': 71.48367309570312,
  'Geometry': {'BoundingBox': {'Height': 0.04271380230784416,
                               'Left': 0.5354583859443665,
                               'Top': 0.9003759622573853,
                               'Width': 0.015150331892073154},
               'Polygon': [{'X': 0.5354583859443665, 'Y': 0.942029595375061},
                           {'X': 0.535525381565094, 'Y': 0.9003759622573853},
                           {'X': 0.5506086945533752, 'Y': 0.9013715386390686},
                           {'X': 0.5505577921867371, 'Y': 0.9430897235870361}]},
  'Id': '89313d1c-777b-4cdb-83da-6480eb6c0d8d',
  'Page': 1,
  'Relationships': [{'Ids': ['b02b0c00-90c2-4912-a3bf-993159d837d8',
                             '4f9c223a-b9a2-4e1a-a03c-5c77c9189d49'],
                     'Type': 'CHILD'}],
  'Tex

  'Relationships': [{'Ids': ['ef0ffab5-48fb-4d19-9ec8-db9c1496bcf7',
                             'e4d029a4-ff64-4eba-91f4-96606b92ee6a',
                             'f63fbe3d-c4a3-4ac4-aa31-9bd684c4b811',
                             'f4146ab5-049a-4af9-b4ae-5752647d3fdd'],
                     'Type': 'CHILD'}],
  'Text': '& Clerk of Council-'},
 {'BlockType': 'LINE',
  'Confidence': 70.43275451660156,
  'Geometry': {'BoundingBox': {'Height': 0.1975463330745697,
                               'Left': 0.6189126372337341,
                               'Top': 0.09978140890598297,
                               'Width': 0.02331325225532055},
               'Polygon': [{'X': 0.6190211772918701, 'Y': 0.29720935225486755},
                           {'X': 0.6189126372337341, 'Y': 0.10011890530586243},
                           {'X': 0.6419970989227295, 'Y': 0.09978140890598297},
                           {'X': 0.6422258615493774,
                            'Y': 0.29732775688171387}]},


  'Confidence': 80.81922149658203,
  'Geometry': {'BoundingBox': {'Height': 0.08260234445333481,
                               'Left': 0.6720513701438904,
                               'Top': 0.05113693326711655,
                               'Width': 0.019778842106461525},
               'Polygon': [{'X': 0.6722118258476257, 'Y': 0.13373927772045135},
                           {'X': 0.6720513701438904, 'Y': 0.05151631310582161},
                           {'X': 0.6916269659996033, 'Y': 0.05113693326711655},
                           {'X': 0.6918302178382874,
                            'Y': 0.13351963460445404}]},
  'Id': 'b25ac636-49dd-46b9-9327-0e937bc3fa26',
  'Page': 1,
  'Relationships': [{'Ids': ['0a646757-9d29-447a-90f0-500cf17863ed',
                             '4bec2cf2-120b-4f05-8472-2bbf8a0b7b75',
                             'deaab02f-136c-4aa3-8458-bd69c4affe6d',
                             'ad2e0620-cbea-47a1-9bc7-888c47ad0419'],
                     'Type': 'CHIL

 {'BlockType': 'LINE',
  'Confidence': 71.10503387451172,
  'Geometry': {'BoundingBox': {'Height': 0.0353824719786644,
                               'Left': 0.8099167346954346,
                               'Top': 0.7375410795211792,
                               'Width': 0.008250913582742214},
               'Polygon': [{'X': 0.810108482837677, 'Y': 0.7725071310997009},
                           {'X': 0.8099167346954346, 'Y': 0.7375410795211792},
                           {'X': 0.8179684281349182, 'Y': 0.7379295229911804},
                           {'X': 0.8181676268577576, 'Y': 0.7729235887527466}]},
  'Id': '5ef74e8d-57ac-4f69-bcfa-32c594419042',
  'Page': 1,
  'Relationships': [{'Ids': ['1d97bf6d-93ce-4470-9c56-e3f18b43319a'],
                     'Type': 'CHILD'}],
  'Text': '---'},
 {'BlockType': 'LINE',
  'Confidence': 99.06380462646484,
  'Geometry': {'BoundingBox': {'Height': 0.04124922305345535,
                               'Left': 0.7971735000610352,
                

  'Geometry': {'BoundingBox': {'Height': 0.04087565094232559,
                               'Left': 0.8606257438659668,
                               'Top': 0.6408515572547913,
                               'Width': 0.019103948026895523},
               'Polygon': [{'X': 0.8608987927436829, 'Y': 0.6809274554252625},
                           {'X': 0.8606257438659668, 'Y': 0.6408515572547913},
                           {'X': 0.8794365525245667, 'Y': 0.6415768265724182},
                           {'X': 0.879729688167572, 'Y': 0.6817271709442139}]},
  'Id': '0f80bee5-0635-4131-a71e-0c91bb04290d',
  'Page': 1,
  'Relationships': [{'Ids': ['93a0e4d0-6cfb-46e0-8b78-18fc82800d1e'],
                     'Type': 'CHILD'}],
  'Text': '8,250'},
 {'BlockType': 'LINE',
  'Confidence': 93.86873626708984,
  'Geometry': {'BoundingBox': {'Height': 0.0413968525826931,
                               'Left': 0.8515415191650391,
                               'Top': 0.0864202082157135,
              

                               'Top': 0.46289360523223877,
                               'Width': 0.01478367205709219},
               'Polygon': [{'X': 0.12526270747184753, 'Y': 0.5217947363853455},
                           {'X': 0.1259872019290924, 'Y': 0.46289360523223877},
                           {'X': 0.1400463879108429, 'Y': 0.463214248418808},
                           {'X': 0.13934242725372314,
                            'Y': 0.5222031474113464}]},
  'Id': 'bb303efa-5320-4927-b119-315fe1ede05c',
  'Page': 1,
  'Text': 'Director',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.95793151855469,
  'Geometry': {'BoundingBox': {'Height': 0.014944156631827354,
                               'Left': 0.1261867731809616,
                               'Top': 0.44416794180870056,
                               'Width': 0.014048195444047451},
               'Polygon': [{'X': 0.1261867731809616, 'Y': 0.4588016867637634},
                           {'X': 0.126366

                           {'X': 0.20919379591941833,
                            'Y': 0.3026292026042938}]},
  'Id': '2f57c881-382c-48be-bf3c-f115f0ab9299',
  'Page': 1,
  'Text': '$10,750',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 27.177291870117188,
  'Geometry': {'BoundingBox': {'Height': 0.020320747047662735,
                               'Left': 0.1994820386171341,
                               'Top': 0.8991186618804932,
                               'Width': 0.01593870297074318},
               'Polygon': [{'X': 0.1994820386171341, 'Y': 0.9183338284492493},
                           {'X': 0.19967924058437347, 'Y': 0.8991186618804932},
                           {'X': 0.21542073786258698, 'Y': 0.900192141532898},
                           {'X': 0.2152310162782669, 'Y': 0.919439435005188}]},
  'Id': '15326734-c6b7-4dfc-9e62-23374fe32035',
  'Page': 1,
  'Text': 'dris',
  'TextType': 'HANDWRITING'},
 {'BlockType': 'WORD',
  'Confidence': 60.353797912597

                            'Y': 0.2995429039001465}]},
  'Id': '42fc1a92-de34-472d-893a-9a891b4c7ed2',
  'Page': 1,
  'Text': 'ments-$7,400;',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 86.18911743164062,
  'Geometry': {'BoundingBox': {'Height': 0.004626244772225618,
                               'Left': 0.3945603370666504,
                               'Top': 0.25718286633491516,
                               'Width': 0.004181636963039637},
               'Polygon': [{'X': 0.3945603370666504, 'Y': 0.26180246472358704},
                           {'X': 0.3945849537849426, 'Y': 0.25718286633491516},
                           {'X': 0.3987419605255127, 'Y': 0.25718751549720764},
                           {'X': 0.39871785044670105,
                            'Y': 0.2618091106414795}]},
  'Id': '30fa21d8-4abc-475d-9eb9-5b7ab1b4d813',
  'Page': 1,
  'Text': '-',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 59.309326171875,
  'Geometry': {'Boun

  'Confidence': 52.56029510498047,
  'Geometry': {'BoundingBox': {'Height': 0.051777590066194534,
                               'Left': 0.4998731315135956,
                               'Top': 0.3142669200897217,
                               'Width': 0.014724982902407646},
               'Polygon': [{'X': 0.4998731315135956, 'Y': 0.3658674657344818},
                           {'X': 0.5000053644180298, 'Y': 0.3142669200897217},
                           {'X': 0.5145981311798096, 'Y': 0.31436747312545776},
                           {'X': 0.5144853591918945, 'Y': 0.3660445213317871}]},
  'Id': '5e4a271b-b66b-45ba-a70f-55bb5069548f',
  'Page': 1,
  'Text': 'Wellare',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 46.23088073730469,
  'Geometry': {'BoundingBox': {'Height': 0.03165040910243988,
                               'Left': 0.5396586656570435,
                               'Top': 0.8437838554382324,
                               'Width': 0.0059854788705706

               'Polygon': [{'X': 0.5812471508979797, 'Y': 0.08025732636451721},
                           {'X': 0.5812587738037109, 'Y': 0.053836219012737274},
                           {'X': 0.5944692492485046, 'Y': 0.05358093976974487},
                           {'X': 0.5944668054580688, 'Y': 0.0800369530916214}]},
  'Id': 'f3384c83-6c30-4396-bf3c-0a1d0c9e3520',
  'Page': 1,
  'Text': 'Finance',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 93.86241149902344,
  'Geometry': {'BoundingBox': {'Height': 0.04035292938351631,
                               'Left': 0.591217041015625,
                               'Top': 0.6416139006614685,
                               'Width': 0.01815195381641388},
               'Polygon': [{'X': 0.591217041015625, 'Y': 0.6811745762825012},
                           {'X': 0.591223955154419, 'Y': 0.6416139006614685},
                           {'X': 0.6093572974205017, 'Y': 0.6423333287239075},
                           {'X': 0.60

               'Polygon': [{'X': 0.6475961804389954, 'Y': 0.8735048770904541},
                           {'X': 0.6475420594215393, 'Y': 0.8312143683433533},
                           {'X': 0.6655675768852234, 'Y': 0.8322665095329285},
                           {'X': 0.6656414866447449, 'Y': 0.874634325504303}]},
  'Id': '765eab87-588e-4053-bc19-ccbf5ba5940a',
  'Page': 1,
  'Text': '13.440',
  'TextType': 'HANDWRITING'},
 {'BlockType': 'WORD',
  'Confidence': 81.03054809570312,
  'Geometry': {'BoundingBox': {'Height': 0.04125352203845978,
                               'Left': 0.6467335820198059,
                               'Top': 0.7238449454307556,
                               'Width': 0.017221558839082718},
               'Polygon': [{'X': 0.6467845439910889, 'Y': 0.7642109394073486},
                           {'X': 0.6467335820198059, 'Y': 0.7238449454307556},
                           {'X': 0.6638861894607544, 'Y': 0.7246624827384949},
                           {'X': 0.

  'Id': '2c4a231b-0613-4750-9751-1e2582522ecb',
  'Page': 1,
  'Text': '---',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 57.58672332763672,
  'Geometry': {'BoundingBox': {'Height': 0.034038152545690536,
                               'Left': 0.6845785975456238,
                               'Top': 0.738944411277771,
                               'Width': 0.0064368401654064655},
               'Polygon': [{'X': 0.684654176235199, 'Y': 0.7726496458053589},
                           {'X': 0.6845785975456238, 'Y': 0.738944411277771},
                           {'X': 0.6909343600273132, 'Y': 0.7392557263374329},
                           {'X': 0.6910154819488525, 'Y': 0.7729825973510742}]},
  'Id': 'b1db2b1f-0c90-49ba-9e0e-faf4dbcb5635',
  'Page': 1,
  'Text': '---',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.89723205566406,
  'Geometry': {'BoundingBox': {'Height': 0.009360519237816334,
                               'Left': 0.6786192059516

  'Page': 1,
  'Text': '5,544',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.66946411132812,
  'Geometry': {'BoundingBox': {'Height': 0.03954707086086273,
                               'Left': 0.7529454827308655,
                               'Top': 0.5465506911277771,
                               'Width': 0.017817281186580658},
               'Polygon': [{'X': 0.7531024813652039, 'Y': 0.5855061411857605},
                           {'X': 0.7529454827308655, 'Y': 0.5465506911277771},
                           {'X': 0.7705876231193542, 'Y': 0.5470738410949707},
                           {'X': 0.7707628011703491, 'Y': 0.586097776889801}]},
  'Id': 'c4ba2ec7-1617-44c4-bab0-ffeae97d1227',
  'Page': 1,
  'Text': '5,544',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 40.62601089477539,
  'Geometry': {'BoundingBox': {'Height': 0.03416015952825546,
                               'Left': 0.7562370300292969,
                               'Top': 0.

  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.45824432373047,
  'Geometry': {'BoundingBox': {'Height': 0.041165776550769806,
                               'Left': 0.8072310090065002,
                               'Top': 0.8246796131134033,
                               'Width': 0.01843102090060711},
               'Polygon': [{'X': 0.8074474334716797, 'Y': 0.864738941192627},
                           {'X': 0.8072310090065002, 'Y': 0.8246796131134033},
                           {'X': 0.8254263997077942, 'Y': 0.8257133364677429},
                           {'X': 0.8256620168685913, 'Y': 0.8658453822135925}]},
  'Id': 'dde189a9-b5de-4d48-a1d7-be5570f94e29',
  'Page': 1,
  'Text': '3,350',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 71.10503387451172,
  'Geometry': {'BoundingBox': {'Height': 0.0353824757039547,
                               'Left': 0.8099167346954346,
                               'Top': 0.7375410795211792,
               

  'Geometry': {'BoundingBox': {'Height': 0.006655437871813774,
                               'Left': 0.8785837888717651,
                               'Top': 0.8730438947677612,
                               'Width': 0.006137390621006489},
               'Polygon': [{'X': 0.878629207611084, 'Y': 0.8793231844902039},
                           {'X': 0.8785837888717651, 'Y': 0.8730438947677612},
                           {'X': 0.8846747279167175, 'Y': 0.8734162449836731},
                           {'X': 0.8847211599349976, 'Y': 0.8796992897987366}]},
  'Id': 'c8ad936a-00fa-4964-bae0-81f0fce9c591',
  'Page': 1,
  'Text': '-',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 96.70333862304688,
  'Geometry': {'BoundingBox': {'Height': 0.007740483619272709,
                               'Left': 0.8788577914237976,
                               'Top': 0.8601085543632507,
                               'Width': 0.00591654097661376},
               'Polygon': [{'X': 0.878

                           {'X': 0.11409012973308563, 'Y': 0.6027814149856567},
                           {'X': 0.16116668283939362, 'Y': 0.6045504212379456},
                           {'X': 0.1600765585899353, 'Y': 0.7006694078445435}]},
  'Id': '2fccee78-f8f2-4d90-afab-53d08c4952c5',
  'Page': 1,
  'Relationships': [{'Ids': ['44c2a1c1-b29d-415a-848b-f15b1adebbfb'],
                     'Type': 'CHILD'}],
  'RowIndex': 1,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 4,
  'ColumnSpan': 1,
  'Confidence': 83.60613250732422,
  'Geometry': {'BoundingBox': {'Height': 0.08530750125646591,
                               'Left': 0.11409012973308563,
                               'Top': 0.5192429423332214,
                               'Width': 0.048028744757175446},
               'Polygon': [{'X': 0.11409012973308563, 'Y': 0.6027814149856567},
                           {'X': 0.11513973772525787, 'Y': 0.5192429423332214},
                           {'X': 0.16211888194084167, 

  'ColumnSpan': 1,
  'Confidence': 74.13118743896484,
  'Geometry': {'BoundingBox': {'Height': 0.09956938028335571,
                               'Left': 0.16211886703968048,
                               'Top': 0.42536890506744385,
                               'Width': 0.15169955790042877},
               'Polygon': [{'X': 0.16211886703968048, 'Y': 0.5205945372581482},
                           {'X': 0.16319887340068817, 'Y': 0.42536890506744385},
                           {'X': 0.31381842494010925, 'Y': 0.4281979501247406},
                           {'X': 0.3131013810634613, 'Y': 0.5249382853507996}]},
  'Id': '4dc8a649-4d04-43a5-8d00-166e49c5e46f',
  'Page': 1,
  'Relationships': [{'Ids': ['f83433df-1824-429a-9e28-211f644ff7fe',
                             '0bab5e7b-14d3-460f-9828-ec6cd0cf50d4'],
                     'Type': 'CHILD'}],
  'RowIndex': 2,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 6,
  'ColumnSpan': 1,
  'Confidence': 94.55560302734375,
  'Geometr

 {'BlockType': 'CELL',
  'ColumnIndex': 9,
  'ColumnSpan': 1,
  'Confidence': 37.162479400634766,
  'Geometry': {'BoundingBox': {'Height': 0.10141512751579285,
                               'Left': 0.31584733724594116,
                               'Top': 0.05304628983139992,
                               'Width': 0.13632775843143463},
               'Polygon': [{'X': 0.31584733724594116, 'Y': 0.15446141362190247},
                           {'X': 0.31657925248146057, 'Y': 0.05571186542510986},
                           {'X': 0.452175110578537, 'Y': 0.05304628983139992},
                           {'X': 0.45179054141044617,
                            'Y': 0.15317413210868835}]},
  'Id': '8db21745-4902-4976-9b83-bfc3062b2c6d',
  'Page': 1,
  'Relationships': [{'Ids': ['ac59d687-86d8-4e43-81d7-e7640f215c10',
                             'dfc13613-49d3-4fd5-8dc5-2d205775bf1d',
                             'f6ca7a4a-d00a-4f53-b316-6488a9216705',
                             'ace2a3a8-

  'Geometry': {'BoundingBox': {'Height': 0.09429394453763962,
                               'Left': 0.5161912441253662,
                               'Top': 0.22388646006584167,
                               'Width': 0.06215675547719002},
               'Polygon': [{'X': 0.5161912441253662, 'Y': 0.3177313804626465},
                           {'X': 0.5163916945457458, 'Y': 0.22402550280094147},
                           {'X': 0.5783479809761047, 'Y': 0.22388646006584167},
                           {'X': 0.5782994627952576, 'Y': 0.3181804120540619}]},
  'Id': '59078c12-b2b6-49d6-89a0-f717c1ab6e18',
  'Page': 1,
  'Relationships': [{'Ids': ['dfda5a0c-2575-407e-bb8d-d1efbe57a773',
                             '992337bc-ff2e-4688-9826-68c55621f397'],
                     'Type': 'CHILD'}],
  'RowIndex': 5,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 8,
  'ColumnSpan': 1,
  'Confidence': 73.73344421386719,
  'Geometry': {'BoundingBox': {'Height': 0.07205016165971756,
     

                            'Y': 0.43609991669654846}]},
  'Id': 'b9c3812e-7fff-45c7-a900-a759a9266f90',
  'Page': 1,
  'Relationships': [{'Ids': ['84891415-8836-4b43-870e-19b5381c61b7'],
                     'Type': 'CHILD'}],
  'RowIndex': 8,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 7,
  'ColumnSpan': 1,
  'Confidence': 91.71382904052734,
  'Geometry': {'BoundingBox': {'Height': 0.09576918184757233,
                               'Left': 0.6544950604438782,
                               'Top': 0.22353766858577728,
                               'Width': 0.07961057126522064},
               'Polygon': [{'X': 0.6546359062194824, 'Y': 0.31873229146003723},
                           {'X': 0.6544950604438782, 'Y': 0.22371555864810944},
                           {'X': 0.7337646484375, 'Y': 0.22353766858577728},
                           {'X': 0.73410564661026, 'Y': 0.3193068504333496}]},
  'Id': 'ae386c99-17aa-4fdc-afe8-577c43fb5a17',
  'Page': 1,
  'Relationships': [{'

 {'BlockType': 'CELL',
  'ColumnIndex': 6,
  'ColumnSpan': 1,
  'Confidence': 49.61447525024414,
  'Geometry': {'BoundingBox': {'Height': 0.11844690889120102,
                               'Left': 0.7831838726997375,
                               'Top': 0.3196617066860199,
                               'Width': 0.058280616998672485},
               'Polygon': [{'X': 0.7837525010108948, 'Y': 0.43702462315559387},
                           {'X': 0.7831838726997375, 'Y': 0.3196617066860199},
                           {'X': 0.8407150506973267, 'Y': 0.3200776278972626},
                           {'X': 0.8414645195007324, 'Y': 0.4381085932254791}]},
  'Id': '2169a103-07d9-41e2-88cf-398a95d38bf0',
  'Page': 1,
  'Relationships': [{'Ids': ['b369ad5c-f167-4968-903e-548be5885321',
                             'ce055407-bfa3-4d2d-ac87-1ecb5bc2ba4b',
                             '1dc17216-2160-4b6f-9199-aea411b6308e',
                             '1d429c62-7229-4194-b34f-c68427b8e01c'],
    

                     'Type': 'CHILD'}],
  'RowIndex': 11,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 6,
  'ColumnSpan': 1,
  'Confidence': 36.259376525878906,
  'Geometry': {'BoundingBox': {'Height': 0.11897537857294083,
                               'Left': 0.8407150506973267,
                               'Top': 0.3200776278972626,
                               'Width': 0.051030147820711136},
               'Polygon': [{'X': 0.8414645195007324, 'Y': 0.4381085932254791},
                           {'X': 0.8407150506973267, 'Y': 0.3200776278972626},
                           {'X': 0.8908365368843079, 'Y': 0.32043999433517456},
                           {'X': 0.8917452096939087, 'Y': 0.4390530288219452}]},
  'Id': 'ba07fdec-3df9-4e30-8db9-a0841bb2862e',
  'Page': 1,
  'Relationships': [{'Ids': ['ad720931-6f70-4de0-bd12-95c3b2de0905'],
                     'Type': 'CHILD'}],
  'RowIndex': 11,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 7,
  'ColumnSpan': 1

                               'Width': 0.022343028336763382},
               'Polygon': [{'X': 0.33946558833122253, 'Y': 0.1316266804933548},
                           {'X': 0.3398890495300293, 'Y': 0.06931255757808685},
                           {'X': 0.3618085980415344, 'Y': 0.06891337782144547},
                           {'X': 0.36142024397850037,
                            'Y': 0.13136768341064453}]},
  'Id': '64a4db9b-f16b-47fa-941d-c07ec00366f4',
  'Page': 1,
  'Relationships': [{'Ids': ['a6ef4a24-34a5-41e2-9d94-84ec8cd538c6'],
                     'Type': 'VALUE'},
                    {'Ids': ['ac59d687-86d8-4e43-81d7-e7640f215c10',
                             'dfc13613-49d3-4fd5-8dc5-2d205775bf1d'],
                     'Type': 'CHILD'}]},
 {'BlockType': 'KEY_VALUE_SET',
  'Confidence': 23.5,
  'EntityTypes': ['VALUE'],
  'Geometry': {'BoundingBox': {'Height': 0.03840626776218414,
                               'Left': 0.35611018538475037,
                               '

  'Geometry': {'BoundingBox': {'Height': 0.023611128330230713,
                               'Left': 0.39165452122688293,
                               'Top': 0.20581872761249542,
                               'Width': 0.27324792742729187},
               'Polygon': [{'X': 0.39165452122688293, 'Y': 0.20581872761249542},
                           {'X': 0.6648184061050415, 'Y': 0.20588698983192444},
                           {'X': 0.6649024486541748, 'Y': 0.22942985594272614},
                           {'X': 0.3916659653186798,
                            'Y': 0.22929657995700836}]},
  'Id': 'da8df9e3-a679-4dba-b09b-7ecbfb172e21',
  'Page': 2,
  'Relationships': [{'Ids': ['0fcd9c83-a3a0-4ca5-83fe-1ff1daf778bb',
                             '0eb798d1-724d-46bf-956e-3e1f8894144e',
                             'd1b33f98-00d1-45d4-9b27-05a35f621f02'],
                     'Type': 'CHILD'}],
  'Text': 'IN VIRGINIA TOWNS'},
 {'BlockType': 'LINE',
  'Confidence': 59.44326400756836,
  'Geo

                           {'X': 0.38003864884376526,
                            'Y': 0.7208353281021118}]},
  'Id': '4262aea7-25c0-44ab-925e-7c865eaa4641',
  'Page': 2,
  'Text': '905',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.22794342041016,
  'Geometry': {'BoundingBox': {'Height': 0.01520554255694151,
                               'Left': 0.4306313991546631,
                               'Top': 0.7095955610275269,
                               'Width': 0.10196978598833084},
               'Polygon': [{'X': 0.4306313991546631, 'Y': 0.7095955610275269},
                           {'X': 0.5325708985328674, 'Y': 0.710141122341156},
                           {'X': 0.5326011776924133, 'Y': 0.7248011231422424},
                           {'X': 0.4306448698043823, 'Y': 0.7242403626441956}]},
  'Id': '48bfe0c1-aee9-41a0-be3c-423dfde175a6',
  'Page': 2,
  'Text': 'Travelers',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.75981140136719,
  

                             'addb7ed6-8b3d-4ab1-bb17-39046345305e',
                             '1ddcd53e-9735-498c-b53a-6a09eea90673',
                             'e11b96fb-b977-41ed-bc36-16dc32f76e8d',
                             'e878f0f9-ab6a-4ffe-9862-90dac00b3f02',
                             '34e85713-6a46-4c45-8f42-4f012113ce28',
                             '184566d9-81fc-45e1-a031-fe75beafa896',
                             'dec05cfa-051b-484d-9cf9-3a7392e805df',
                             'eb8bc5e0-8676-4ba0-9c72-15a8d0935079',
                             '49345b0b-3819-4722-af19-77ac135bb84b',
                             '5bde0df0-baab-442f-ac4a-7c76840e5dd2',
                             'fc9065a8-dd00-4e65-b77d-5bc9875e6a85',
                             '1abd7a13-8292-4dd8-ab53-101b210836cf',
                             '2955bc1f-ef2b-49c4-8ce0-3cfdec4f0954',
                             'ffb66cd9-583f-4597-9966-6bcac1c4b86b',
                             'ccd9

  'Relationships': [{'Ids': ['fda30a55-36ab-4be1-8c5b-3bebcfd452ae',
                             '12591dc4-d9f8-49a3-b773-5320b653c613'],
                     'Type': 'CHILD'}],
  'Text': 'Director of'},
 {'BlockType': 'LINE',
  'Confidence': 35.30671310424805,
  'Geometry': {'BoundingBox': {'Height': 0.04584633186459541,
                               'Left': 0.8119832277297974,
                               'Top': 0.04376270994544029,
                               'Width': 0.03195090591907501},
               'Polygon': [{'X': 0.8439341187477112, 'Y': 0.043912529945373535},
                           {'X': 0.8423367142677307, 'Y': 0.0896090418100357},
                           {'X': 0.8119832277297974, 'Y': 0.08948331326246262},
                           {'X': 0.8134624361991882,
                            'Y': 0.04376270994544029}]},
  'Id': '2e7ed4aa-8dd4-428a-ba69-2d6aa7f17224',
  'Page': 3,
  'Relationships': [{'Ids': ['447d468b-750b-4cd8-aafb-119dc8ba117b'],
              

  'Geometry': {'BoundingBox': {'Height': 0.02715253084897995,
                               'Left': 0.7432208061218262,
                               'Top': 0.7293190956115723,
                               'Width': 0.01917251944541931},
               'Polygon': [{'X': 0.7623933553695679, 'Y': 0.7293190956115723},
                           {'X': 0.7615898847579956, 'Y': 0.7563403248786926},
                           {'X': 0.7432208061218262, 'Y': 0.7564716339111328},
                           {'X': 0.7439793348312378, 'Y': 0.729442298412323}]},
  'Id': '1abd7a13-8292-4dd8-ab53-101b210836cf',
  'Page': 3,
  'Relationships': [{'Ids': ['a1a923aa-689a-4caa-a9e0-27d19044ee46'],
                     'Type': 'CHILD'}],
  'Text': 'Supt.'},
 {'BlockType': 'LINE',
  'Confidence': 89.59559631347656,
  'Geometry': {'BoundingBox': {'Height': 0.027350328862667084,
                               'Left': 0.7487079501152039,
                               'Top': 0.8776459693908691,
             

  'Confidence': 99.70026397705078,
  'Geometry': {'BoundingBox': {'Height': 0.05777714401483536,
                               'Left': 0.6711320877075195,
                               'Top': 0.6196836829185486,
                               'Width': 0.018622469156980515},
               'Polygon': [{'X': 0.6897545456886292, 'Y': 0.6196836829185486},
                           {'X': 0.6884328722953796, 'Y': 0.6773604154586792},
                           {'X': 0.6711320877075195, 'Y': 0.6774608492851257},
                           {'X': 0.6723640561103821, 'Y': 0.6197676658630371}]},
  'Id': 'b5fffe88-50b7-4e7f-90a2-23ee62645541',
  'Page': 3,
  'Relationships': [{'Ids': ['d5f30379-133d-4fe6-be11-94709fdf258a'],
                     'Type': 'CHILD'}],
  'Text': 'Engineer'},
 {'BlockType': 'LINE',
  'Confidence': 98.17134094238281,
  'Geometry': {'BoundingBox': {'Height': 0.031076885759830475,
                               'Left': 0.6689557433128357,
                               

 {'BlockType': 'LINE',
  'Confidence': 79.32984161376953,
  'Geometry': {'BoundingBox': {'Height': 0.30378758907318115,
                               'Left': 0.5865647792816162,
                               'Top': 0.6178398728370667,
                               'Width': 0.026188943535089493},
               'Polygon': [{'X': 0.6127537488937378, 'Y': 0.6178398728370667},
                           {'X': 0.6078976392745972, 'Y': 0.9214159250259399},
                           {'X': 0.5865647792816162, 'Y': 0.9216274619102478},
                           {'X': 0.5908244848251343, 'Y': 0.6179449558258057}]},
  'Id': '9e109a4e-6649-4c1a-9d51-2e22de35515e',
  'Page': 3,
  'Relationships': [{'Ids': ['b7516448-6240-42f8-919e-39f1660be87d',
                             '374b6841-159e-4f92-a007-082686a7eb37',
                             '90229310-1d3f-4808-aefd-9d44ed91ccf0',
                             '639946bf-33fc-44ed-b089-c092c9decb4b',
                             '1bac2a29-c2bb-4

  'Relationships': [{'Ids': ['bf87af08-ce52-4bd1-8487-f3518f57635d'],
                     'Type': 'CHILD'}],
  'Text': '9,720'},
 {'BlockType': 'LINE',
  'Confidence': 98.01242065429688,
  'Geometry': {'BoundingBox': {'Height': 0.03222941234707832,
                               'Left': 0.5001925230026245,
                               'Top': 0.7266205549240112,
                               'Width': 0.01543458178639412},
               'Polygon': [{'X': 0.5156270861625671, 'Y': 0.7266205549240112},
                           {'X': 0.5153911709785461, 'Y': 0.7587411999702454},
                           {'X': 0.5001925230026245, 'Y': 0.7588499784469604},
                           {'X': 0.5003840923309326, 'Y': 0.726721465587616}]},
  'Id': '0e3a30c9-b937-471a-ac21-3fd5917b3e88',
  'Page': 3,
  'Relationships': [{'Ids': ['2d052da3-98dd-4c6f-913d-bd2d1ac37da4'],
                     'Type': 'CHILD'}],
  'Text': '1,440'},
 {'BlockType': 'LINE',
  'Confidence': 98.03055572509766,
  'Ge

  'Geometry': {'BoundingBox': {'Height': 0.03672543168067932,
                               'Left': 0.4264390766620636,
                               'Top': 0.526024341583252,
                               'Width': 0.015764253214001656},
               'Polygon': [{'X': 0.4422033131122589, 'Y': 0.526024341583252},
                           {'X': 0.4421786367893219, 'Y': 0.5626892447471619},
                           {'X': 0.42646586894989014, 'Y': 0.5627497434616089},
                           {'X': 0.4264390766620636, 'Y': 0.5260753035545349}]},
  'Id': '359cd378-3797-41f5-a13c-44ecc98deb2c',
  'Page': 3,
  'Relationships': [{'Ids': ['63ac5635-6bc3-4e4d-a7d8-884a53d591d3'],
                     'Type': 'CHILD'}],
  'Text': '9,569'},
 {'BlockType': 'LINE',
  'Confidence': 99.820068359375,
  'Geometry': {'BoundingBox': {'Height': 0.04205634072422981,
                               'Left': 0.42589566111564636,
                               'Top': 0.6151586174964905,
              

  'Geometry': {'BoundingBox': {'Height': 0.02614598721265793,
                               'Left': 0.29380911588668823,
                               'Top': 0.021176598966121674,
                               'Width': 0.01624404825270176},
               'Polygon': [{'X': 0.30977576971054077,
                            'Y': 0.021260472014546394},
                           {'X': 0.31005316972732544, 'Y': 0.0473225861787796},
                           {'X': 0.2941220998764038, 'Y': 0.047245871275663376},
                           {'X': 0.29380911588668823,
                            'Y': 0.021176598966121674}]},
  'Id': '39258ee7-2552-41b9-9708-c9859c3f85e2',
  'Page': 3,
  'Relationships': [{'Ids': ['57984e03-0dfe-4ef9-b866-a8c1eb311e35'],
                     'Type': 'CHILD'}],
  'Text': 'orton'},
 {'BlockType': 'LINE',
  'Confidence': 89.97152709960938,
  'Geometry': {'BoundingBox': {'Height': 0.020853208377957344,
                               'Left': 0.2939181625843048,
  

               'Polygon': [{'X': 0.25430747866630554, 'Y': 0.6183878779411316},
                           {'X': 0.25489601492881775, 'Y': 0.6547254920005798},
                           {'X': 0.23973959684371948, 'Y': 0.6548070907592773},
                           {'X': 0.23910124599933624, 'Y': 0.618460476398468}]},
  'Id': '835b4e36-a46b-4c87-839b-3efd0ff8377c',
  'Page': 3,
  'Relationships': [{'Ids': ['49cb5c73-809f-4af4-978c-7964708c7ad9'],
                     'Type': 'CHILD'}],
  'Text': '9,700'},
 {'BlockType': 'LINE',
  'Confidence': 99.59451293945312,
  'Geometry': {'BoundingBox': {'Height': 0.03492042422294617,
                               'Left': 0.2407788634300232,
                               'Top': 0.7208787798881531,
                               'Width': 0.015423793345689774},
               'Polygon': [{'X': 0.2556377053260803, 'Y': 0.7208787798881531},
                           {'X': 0.25620266795158386, 'Y': 0.7556943893432617},
                           {'

                            'Y': 0.6121227741241455}]},
  'Id': '2e0cbbe2-161d-422a-863d-623820c8e6aa',
  'Page': 3,
  'Relationships': [{'Ids': ['8d98541f-aa80-4632-8f4c-389cad4a55e1'],
                     'Type': 'CHILD'}],
  'Text': '11,400'},
 {'BlockType': 'LINE',
  'Confidence': 99.48336791992188,
  'Geometry': {'BoundingBox': {'Height': 0.03462273254990578,
                               'Left': 0.1659703403711319,
                               'Top': 0.7203466296195984,
                               'Width': 0.016440095379948616},
               'Polygon': [{'X': 0.18161888420581818, 'Y': 0.7203466296195984},
                           {'X': 0.18241043388843536, 'Y': 0.7548593282699585},
                           {'X': 0.16681106388568878, 'Y': 0.7549693584442139},
                           {'X': 0.1659703403711319, 'Y': 0.7204479575157166}]},
  'Id': '7f09cc9b-b85f-4a2f-a688-da091c300f52',
  'Page': 3,
  'Relationships': [{'Ids': ['8572379c-b3ee-4fd2-a9d7-faa014581e35'],


                               'Top': 0.019536148756742477,
                               'Width': 0.055271510034799576},
               'Polygon': [{'X': 0.08614727109670639,
                            'Y': 0.019823897629976273},
                           {'X': 0.08668731898069382,
                            'Y': 0.038005515933036804},
                           {'X': 0.032041244208812714,
                            'Y': 0.03773484006524086},
                           {'X': 0.03141580894589424,
                            'Y': 0.019536148756742477}]},
  'Id': 'ffa30a79-d51f-4427-9d7a-c9fe3db1f528',
  'Page': 3,
  'Relationships': [{'Ids': ['1abbbb10-9938-4ad8-8d0c-cfe1566072c5'],
                     'Type': 'CHILD'}],
  'Text': 'I'},
 {'BlockType': 'WORD',
  'Confidence': 96.62971496582031,
  'Geometry': {'BoundingBox': {'Height': 0.021215718239545822,
                               'Left': 0.8812052607536316,
                               'Top': 0.4207614064216614,
          

  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.84558868408203,
  'Geometry': {'BoundingBox': {'Height': 0.05592941865324974,
                               'Left': 0.8224706053733826,
                               'Top': 0.3338618278503418,
                               'Width': 0.016877969726920128},
               'Polygon': [{'X': 0.8393486142158508, 'Y': 0.3338618874549866},
                           {'X': 0.8373668193817139, 'Y': 0.38977718353271484},
                           {'X': 0.8224706053733826, 'Y': 0.38979125022888184},
                           {'X': 0.824379563331604, 'Y': 0.3338618278503418}]},
  'Id': 'ae57e7a3-f9cf-4f36-b0a8-5f8df65eeeef',
  'Page': 3,
  'Text': 'Revenue',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.0541000366211,
  'Geometry': {'BoundingBox': {'Height': 0.045875731855630875,
                               'Left': 0.8175696134567261,
                               'Top': 0.7245480418205261,
          

 {'BlockType': 'WORD',
  'Confidence': 99.81086730957031,
  'Geometry': {'BoundingBox': {'Height': 0.007919706404209137,
                               'Left': 0.7847772836685181,
                               'Top': 0.3259139657020569,
                               'Width': 0.01576838083565235},
               'Polygon': [{'X': 0.8005456924438477, 'Y': 0.3259161114692688},
                           {'X': 0.8002921342849731, 'Y': 0.3338336944580078},
                           {'X': 0.7847772836685181, 'Y': 0.33383363485336304},
                           {'X': 0.7850201725959778, 'Y': 0.3259139657020569}]},
  'Id': '9d07ef52-87e8-48b2-9227-1c4f169a2042',
  'Page': 3,
  'Text': '$',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 97.8371353149414,
  'Geometry': {'BoundingBox': {'Height': 0.03884921595454216,
                               'Left': 0.7816720008850098,
                               'Top': 0.3410381078720093,
                               'Width': 0.0

                               'Width': 0.025713199749588966},
               'Polygon': [{'X': 0.724976658821106, 'Y': 0.7151414752006531},
                           {'X': 0.722854495048523, 'Y': 0.7958167791366577},
                           {'X': 0.6992634534835815, 'Y': 0.7960010766983032},
                           {'X': 0.7012127041816711, 'Y': 0.7152946591377258}]},
  'Id': 'e498b1b2-8625-422d-b174-298c694565f0',
  'Page': 3,
  'Text': 'Sergeant-$624).',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 96.93507385253906,
  'Geometry': {'BoundingBox': {'Height': 0.05304734408855438,
                               'Left': 0.6961007714271545,
                               'Top': 0.7961373925209045,
                               'Width': 0.01816251501441002},
               'Polygon': [{'X': 0.7142632603645325, 'Y': 0.7961373925209045},
                           {'X': 0.7129131555557251, 'Y': 0.849038302898407},
                           {'X': 0.69610077142715

                               'Width': 0.02211257629096508},
               'Polygon': [{'X': 0.662506103515625, 'Y': 0.7884348630905151},
                           {'X': 0.6599317193031311, 'Y': 0.9123064875602722},
                           {'X': 0.6403935551643372, 'Y': 0.9124974012374878},
                           {'X': 0.6427454948425293, 'Y': 0.7885866165161133}]},
  'Id': '18bab21e-40f8-43fe-8514-7c089538cbb7',
  'Page': 3,
  'Text': 'Ciactor-$6864-$8190',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 89.28902435302734,
  'Geometry': {'BoundingBox': {'Height': 0.005048976745456457,
                               'Left': 0.6485926508903503,
                               'Top': 0.8338466882705688,
                               'Width': 0.004375962540507317},
               'Polygon': [{'X': 0.6529685854911804, 'Y': 0.8338466882705688},
                           {'X': 0.652868390083313, 'Y': 0.8388592004776001},
                           {'X': 0.64859265

  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 98.0047607421875,
  'Geometry': {'BoundingBox': {'Height': 0.05089588835835457,
                               'Left': 0.609772264957428,
                               'Top': 0.845099925994873,
                               'Width': 0.014298089779913425},
               'Polygon': [{'X': 0.6240703463554382, 'Y': 0.845099925994873},
                           {'X': 0.6231886148452759, 'Y': 0.8958684802055359},
                           {'X': 0.609772264957428, 'Y': 0.8959957957267761},
                           {'X': 0.6105914115905762, 'Y': 0.8452162146568298}]},
  'Id': '119293fc-5ebc-4888-9552-a5477a60db02',
  'Page': 3,
  'Text': 'Service-',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 52.444679260253906,
  'Geometry': {'BoundingBox': {'Height': 0.03270436078310013,
                               'Left': 0.5899099111557007,
                               'Top': 0.4694048762321472,
              

                               'Top': 0.6227233409881592,
                               'Width': 0.015666402876377106},
               'Polygon': [{'X': 0.5172389149665833, 'Y': 0.6227233409881592},
                           {'X': 0.5169728398323059, 'Y': 0.658572256565094},
                           {'X': 0.5015724897384644, 'Y': 0.6586564779281616},
                           {'X': 0.501788854598999, 'Y': 0.622798502445221}]},
  'Id': 'bf87af08-ce52-4bd1-8487-f3518f57635d',
  'Page': 3,
  'Text': '9,720',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 98.01242065429688,
  'Geometry': {'BoundingBox': {'Height': 0.03222941234707832,
                               'Left': 0.5001925230026245,
                               'Top': 0.7266205549240112,
                               'Width': 0.01543455570936203},
               'Polygon': [{'X': 0.5156270861625671, 'Y': 0.7266205549240112},
                           {'X': 0.5153911709785461, 'Y': 0.7587411999702454},
 

 {'BlockType': 'WORD',
  'Confidence': 99.772705078125,
  'Geometry': {'BoundingBox': {'Height': 0.03623160719871521,
                               'Left': 0.42684850096702576,
                               'Top': 0.41946423053741455,
                               'Width': 0.01599842496216297},
               'Polygon': [{'X': 0.4428469240665436, 'Y': 0.41946423053741455},
                           {'X': 0.4428207278251648, 'Y': 0.45566317439079285},
                           {'X': 0.42687341570854187, 'Y': 0.45569583773612976},
                           {'X': 0.42684850096702576,
                            'Y': 0.41948723793029785}]},
  'Id': 'be4c2c57-e5a3-4cfc-a53b-bdcb40d9c229',
  'Page': 3,
  'Text': '6,947',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 99.37035369873047,
  'Geometry': {'BoundingBox': {'Height': 0.03672542795538902,
                               'Left': 0.4264390766620636,
                               'Top': 0.526024341583252,
       

                           {'X': 0.31005316972732544, 'Y': 0.0473225861787796},
                           {'X': 0.2941220700740814, 'Y': 0.047245871275663376},
                           {'X': 0.29380911588668823,
                            'Y': 0.021176597103476524}]},
  'Id': '57984e03-0dfe-4ef9-b866-a8c1eb311e35',
  'Page': 3,
  'Text': 'orton',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 89.97152709960938,
  'Geometry': {'BoundingBox': {'Height': 0.020853212103247643,
                               'Left': 0.2939181923866272,
                               'Top': 0.09413865953683853,
                               'Width': 0.005557855125516653},
               'Polygon': [{'X': 0.2992340922355652, 'Y': 0.0941600650548935},
                           {'X': 0.2994760274887085, 'Y': 0.11499186605215073},
                           {'X': 0.29416966438293457, 'Y': 0.11497235298156738},
                           {'X': 0.2939181923866272,
                          

  'Geometry': {'BoundingBox': {'Height': 0.06027591601014137,
                               'Left': 0.25156641006469727,
                               'Top': 0.8943060636520386,
                               'Width': 0.01570531725883484},
               'Polygon': [{'X': 0.26633983850479126, 'Y': 0.8943060636520386},
                           {'X': 0.2672717273235321, 'Y': 0.954429030418396},
                           {'X': 0.2525803744792938, 'Y': 0.9545819759368896},
                           {'X': 0.25156641006469727, 'Y': 0.89444500207901}]},
  'Id': '45eb5d66-39e6-4fa8-bbad-3e400dacd6bd',
  'Page': 3,
  'Text': 'Assessor-',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 98.50048065185547,
  'Geometry': {'BoundingBox': {'Height': 0.060489311814308167,
                               'Left': 0.21997256577014923,
                               'Top': 0.6976193189620972,
                               'Width': 0.016893967986106873},
               'Polygon': [{'

               'Polygon': [{'X': 0.14439430832862854, 'Y': 0.7196336388587952},
                           {'X': 0.14532004296779633, 'Y': 0.7548223733901978},
                           {'X': 0.12970998883247375, 'Y': 0.7549324035644531},
                           {'X': 0.1287340521812439, 'Y': 0.719734787940979}]},
  'Id': '4d9366a6-7098-491b-8122-e8ce53d45f0e',
  'Page': 3,
  'Text': '6,000',
  'TextType': 'PRINTED'},
 {'BlockType': 'WORD',
  'Confidence': 89.15422821044922,
  'Geometry': {'BoundingBox': {'Height': 0.016658103093504906,
                               'Left': 0.13711020350456238,
                               'Top': 0.7938964366912842,
                               'Width': 0.007553269155323505},
               'Polygon': [{'X': 0.14422351121902466, 'Y': 0.7938964366912842},
                           {'X': 0.1446634829044342, 'Y': 0.8104978799819946},
                           {'X': 0.13756100833415985, 'Y': 0.810554563999176},
                           {'X': 0

  'Page': 3,
  'Relationships': [{'Ids': ['9d07ef52-87e8-48b2-9227-1c4f169a2042',
                             'a15e5baf-c8ab-4bae-be32-0d5d0fb9d952'],
                     'Type': 'CHILD'}],
  'RowIndex': 2,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 6,
  'ColumnSpan': 1,
  'Confidence': 88.27423858642578,
  'Geometry': {'BoundingBox': {'Height': 0.10368719696998596,
                               'Left': 0.7409704327583313,
                               'Top': 0.3930966258049011,
                               'Width': 0.07837008684873581},
               'Polygon': [{'X': 0.8193405270576477, 'Y': 0.3930966258049011},
                           {'X': 0.8158360123634338, 'Y': 0.496577650308609},
                           {'X': 0.7409704327583313, 'Y': 0.4967838227748871},
                           {'X': 0.7437902688980103, 'Y': 0.3931722640991211}]},
  'Id': 'f711e05e-65bc-4691-82ee-00aaa64b9178',
  'Page': 3,
  'Relationships': [{'Ids': ['c7e2672f-f6d5-42b1-b32b-c427

                           {'X': 0.6410024166107178,
                            'Y': 0.02466421388089657}]},
  'Id': 'bccbc393-c81b-4479-b4c6-fb3a0f6ad731',
  'Page': 3,
  'Relationships': [{'Ids': ['0c57f70c-d1ad-4102-8c51-a05b0735c146'],
                     'Type': 'CHILD'}],
  'RowIndex': 4,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 2,
  'ColumnSpan': 1,
  'Confidence': 97.79866790771484,
  'Geometry': {'BoundingBox': {'Height': 0.06523030996322632,
                               'Left': 0.6387025713920593,
                               'Top': 0.09038253128528595,
                               'Width': 0.0826011449098587},
               'Polygon': [{'X': 0.7213037014007568, 'Y': 0.09071771800518036},
                           {'X': 0.7197085022926331, 'Y': 0.15561284124851227},
                           {'X': 0.6387025713920593, 'Y': 0.15536843240261078},
                           {'X': 0.6398460865020752,
                            'Y': 0.09038253128528595}]

                            'Y': 0.23695571720600128}]},
  'Id': 'aa381b6c-6771-46f7-8015-30bbc2d093ed',
  'Page': 3,
  'Relationships': [{'Ids': ['d2e90f9b-6f1c-47ad-9d24-dedc00c76fe5'],
                     'Type': 'CHILD'}],
  'RowIndex': 6,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 5,
  'ColumnSpan': 1,
  'Confidence': 93.73805236816406,
  'Geometry': {'BoundingBox': {'Height': 0.07553968578577042,
                               'Left': 0.46451830863952637,
                               'Top': 0.31791216135025024,
                               'Width': 0.06627000123262405},
               'Polygon': [{'X': 0.5307883024215698, 'Y': 0.31793004274368286},
                           {'X': 0.5301538705825806, 'Y': 0.3933861255645752},
                           {'X': 0.46451830863952637, 'Y': 0.39345183968544006},
                           {'X': 0.46471700072288513,
                            'Y': 0.31791216135025024}]},
  'Id': 'c6b181bb-7df7-419d-9f76-c725e8a2a799',

                            'Y': 0.7045922875404358}]},
  'Id': '8ce56838-50fb-49f0-b36e-66d8f376902d',
  'Page': 3,
  'Relationships': [{'Ids': ['69d2a715-c068-4c2c-8fb1-651f59a2a118'],
                     'Type': 'CHILD'}],
  'RowIndex': 7,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 10,
  'ColumnSpan': 1,
  'Confidence': 74.48902130126953,
  'Geometry': {'BoundingBox': {'Height': 0.1758146435022354,
                               'Left': 0.3965314030647278,
                               'Top': 0.7827142477035522,
                               'Width': 0.0669630616903305},
               'Polygon': [{'X': 0.4634944796562195, 'Y': 0.7827142477035522},
                           {'X': 0.46303388476371765, 'Y': 0.9578366875648499},
                           {'X': 0.3971405029296875, 'Y': 0.9585288763046265},
                           {'X': 0.3965314030647278, 'Y': 0.7832202315330505}]},
  'Id': '14a8cc17-29d8-441a-83f2-aeaa4347dbb2',
  'Page': 3,
  'Relationships': [{'

  'Id': '680aad97-8c7a-4595-bd63-6b787870af98',
  'Page': 3,
  'Relationships': [{'Ids': ['0923ba77-77c4-42a4-8b29-4ff1118d2de7'],
                     'Type': 'CHILD'}],
  'RowIndex': 9,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 8,
  'ColumnSpan': 1,
  'Confidence': 75.3257064819336,
  'Geometry': {'BoundingBox': {'Height': 0.10063479095697403,
                               'Left': 0.32594674825668335,
                               'Top': 0.6043897271156311,
                               'Width': 0.02699829451739788},
               'Polygon': [{'X': 0.35220202803611755, 'Y': 0.6043897271156311},
                           {'X': 0.3529450297355652, 'Y': 0.7048623561859131},
                           {'X': 0.3269267976284027, 'Y': 0.7050245404243469},
                           {'X': 0.32594674825668335,
                            'Y': 0.6045090556144714}]},
  'Id': '65a3330a-5a26-428f-8a45-aa6317526d2f',
  'Page': 3,
  'Relationships': [{'Ids': ['e1331fd6-ecbe-4ccf

  'Relationships': [{'Ids': ['26407914-284a-43dc-b91a-0bed46773762'],
                     'Type': 'CHILD'}],
  'RowIndex': 12,
  'RowSpan': 1},
 {'BlockType': 'CELL',
  'ColumnIndex': 5,
  'ColumnSpan': 1,
  'Confidence': 91.05438995361328,
  'Geometry': {'BoundingBox': {'Height': 0.07594912499189377,
                               'Left': 0.1411776840686798,
                               'Top': 0.31782445311546326,
                               'Width': 0.05396381765604019},
               'Polygon': [{'X': 0.1935414969921112, 'Y': 0.3178386688232422},
                           {'X': 0.1951414942741394, 'Y': 0.3937215209007263},
                           {'X': 0.14312687516212463, 'Y': 0.3937735855579376},
                           {'X': 0.1411776840686798,
                            'Y': 0.31782445311546326}]},
  'Id': 'dc4fa9a9-a2e3-4ced-9a5a-93fdbcbd055f',
  'Page': 3,
  'Relationships': [{'Ids': ['a10eb30c-bfb4-4842-8db9-597ee2f37091'],
                     'Type': 'CHILD'}

                            'Y': 0.10088729858398438}]},
  'Id': 'f5acbb3f-daab-4770-864e-38e974663294',
  'Page': 3,
  'Relationships': [{'Ids': ['82fde7db-6277-43e6-8a72-076c4461d69d'],
                     'Type': 'VALUE'},
                    {'Ids': ['26d20533-35b5-4fb4-868c-dd133cbeb7df',
                             '26776216-6af1-4ce5-bda6-0fc143250c2a',
                             '4092118f-bde4-49de-ba14-77a5abaddb31'],
                     'Type': 'CHILD'}]},
 {'BlockType': 'KEY_VALUE_SET',
  'Confidence': 25.5,
  'EntityTypes': ['VALUE'],
  'Geometry': {'BoundingBox': {'Height': 0.06110827252268791,
                               'Left': 0.7939905524253845,
                               'Top': 0.1008758693933487,
                               'Width': 0.030720386654138565},
               'Polygon': [{'X': 0.8247109055519104, 'Y': 0.1009896844625473},
                           {'X': 0.8226689100265503, 'Y': 0.1619841456413269},
                           {'X': 0.7939905

                     'Type': 'VALUE'},
                    {'Ids': ['40f6174a-f552-4948-ac2f-88a58fb46933',
                             'c34aff66-4059-435e-a86a-327a1e945165'],
                     'Type': 'CHILD'}]},
 {'BlockType': 'KEY_VALUE_SET',
  'Confidence': 13.500000953674316,
  'EntityTypes': ['VALUE'],
  'Geometry': {'BoundingBox': {'Height': 0.04278085008263588,
                               'Left': 0.5514640212059021,
                               'Top': 0.8057094812393188,
                               'Width': 0.01692214049398899},
               'Polygon': [{'X': 0.5683861970901489, 'Y': 0.8057094812393188},
                           {'X': 0.567865252494812, 'Y': 0.8483480215072632},
                           {'X': 0.5514640212059021, 'Y': 0.848490297794342},
                           {'X': 0.5519209504127502, 'Y': 0.8058404922485352}]},
  'Id': '9f98e870-d441-4214-ba6d-bb7d2e2861b6',
  'Page': 3},
 {'BlockType': 'PAGE',
  'Geometry': {'BoundingBox': {'Height': 0.

                           {'X': 0.1412220150232315, 'Y': 0.9209259748458862}]},
  'Id': '5cdf1b1d-82a9-4e6c-9c6f-b53e6c6b157f',
  'Page': 4,
  'Relationships': [{'Ids': ['82fff327-f48b-4431-bb74-e33c7de1a676'],
                     'Type': 'CHILD'}],
  'Text': 'Municipality'},
 {'BlockType': 'LINE',
  'Confidence': 99.75315856933594,
  'Geometry': {'BoundingBox': {'Height': 0.04436861351132393,
                               'Left': 0.1161215677857399,
                               'Top': 0.74027019739151,
                               'Width': 0.01605140045285225},
               'Polygon': [{'X': 0.11842702329158783, 'Y': 0.7844703197479248},
                           {'X': 0.1161215677857399, 'Y': 0.74027019739151},
                           {'X': 0.12989473342895508, 'Y': 0.7404105067253113},
                           {'X': 0.13217297196388245,
                            'Y': 0.7846388220787048}]},
  'Id': '6ee9411f-b668-4630-bcaa-d140d7ccb9c6',
  'Page': 4,
  'Relationships

In [67]:
output_file = 'output-part1.csv'

# replace content
with open(output_file, "wt") as fout:
    fout.write(results_csv)

# show the results
print('CSV OUTPUT FILE: ', output_file)

CSV OUTPUT FILE:  output-part1.csv


In [68]:
results_csv

"Table: Table_1\n\nComptroller ,Auditor ,Treasurer ,Comm. of Revenue ,Director of Public Welfare ,Director of Public Health ,Director of Public ,Dinctor of ,,\nsignity $ None dris ,$ None ,$ 8,650 3.000 ,$ 8,800 3,000 ,$ 5,400 ,$11,287.50 Neme ,Works $10,750 - ,$1400 -$7, ,Real ,\nagtion ,--- ,9,000 7.000 ,9,000 ,6,930 ,9,750 7.300 ,10,600 Director- $8,450 ments-$7,400; - Director of Public ,1450 Drector Dinctor of Inspections - Safety-$9.40. ,Finance Director. Real Estate Assess- $8,450; Personne) Libraries-$7,50 $8,450; Director t6 ,\n- - ,Clerk ,3,600 City's 2/3 ,3,814 City's 2/3 ,4,410 ,--- Public Wellare ,4,500 ,,,\nluma Vista ... ,--- ,3,432 ,3,000 ,3,000 ,- - ,3,060 Supt. ,,Supt. Water 1 Sewer-$3,060, ,\n... ,--- ,6,800 ,5,500 ,4,500 ,9,500 ,... ,4,120 ,Director of Finance & Clerk of Council- ,\n,,9.900 ,9,900 ,10,408 ,,$6,500; ,Plannisg.bgineer-$6,000. ,- ,\n13.440 Parille --- ,9.360 --- ,2,533 City's 1/3 ,5,850 City's 2/3 ,4,800 ,9,000 ,8,200 ,1,00 ,Director of Finance & Clerk

In [73]:
analyzePDF_results.keys()

dict_keys(['DocumentMetadata', 'JobStatus', 'NextToken', 'Blocks', 'AnalyzeDocumentModelVersion', 'ResponseMetadata'])

In [78]:
analyzePDF_results["NextToken"]

'mZEttZNZj0yBDvf+PqnVRkH7IbjaezVAZMfzAZQtU1GhViF/r05C7Rub2RKYi1B49xM7C52FCzWv0BMo0sJYerpAkSpMFaPvXsllJ/Xdyi55HfZ1ng=='

In [77]:
cnt=0
for block in analyzePDF_results['Blocks']:
    if block['BlockType']=="TABLE":
        cnt += 1
        print(cnt)

1
2


In [79]:
analyzePDF.keys()

dict_keys(['JobId', 'ResponseMetadata'])

In [83]:
def GetResults(jobId):
    maxResults = 1000
    paginationToken = None
    finished = False
    while finished == False:
        response = None
#         if self.processType == ProcessType.ANALYSIS:
        if paginationToken == None:
#             response = self.textract.get_document_analysis(JobId=jobId, MaxResults=maxResults)
            response = textract.get_document_analysis(JobId=analyzePDF['JobId'])
        else:
#             response = self.textract.get_document_analysis(JobId=jobId, MaxResults=maxResults, NextToken=paginationToken)
            response = textract.get_document_analysis(JobId=analyzePDF['JobId'], NextToken=paginationToken)
#         if self.processType == ProcessType.DETECTION:
#            if paginationToken == None:
#                response = self.textract.get_document_text_detection(JobId=jobId, MaxResults=maxResults)
#            else:
#                response = self.textract.get_document_text_detection(JobId=jobId,MaxResults=maxResults, NextToken=paginationToken)
        blocks = response['Blocks']
        print('Detected Document Text')
        print('Pages: {}'.format(response['DocumentMetadata']['Pages']))
        # Display block information
#         for block in blocks:
#            self.DisplayBlockInfo(block)
#            print()
#            print()
        if 'NextToken' in response:
           paginationToken = response['NextToken']
        else:
            finished = True
            return response


In [86]:
analyzePDF_results.keys()

dict_keys(['DocumentMetadata', 'JobStatus', 'NextToken', 'Blocks', 'AnalyzeDocumentModelVersion', 'ResponseMetadata'])

In [87]:
analyzePDF_results['Blocks']

[{'BlockType': 'PAGE',
  'Geometry': {'BoundingBox': {'Width': 0.988946795463562,
    'Height': 0.976067066192627,
    'Left': 0.005862809717655182,
    'Top': 0.005486753303557634},
   'Polygon': [{'X': 0.005862809717655182, 'Y': 0.9114241003990173},
    {'X': 0.01933825947344303, 'Y': 0.02792367897927761},
    {'X': 0.9848500490188599, 'Y': 0.005486753303557634},
    {'X': 0.994809627532959, 'Y': 0.9815537929534912}]},
  'Id': 'e58e7036-87a4-4d67-b20e-f75890b507cc',
  'Relationships': [{'Type': 'CHILD',
    'Ids': ['8db5b508-4482-49fd-96de-32fbbe36a816',
     '348db637-ab12-4a38-a98f-02c10cdf35e0',
     '4bbb078a-94c6-4c46-95f0-613411a76ab3',
     '0e23d71e-6d51-4025-b703-3a43c67d34c5',
     '6ea0357f-7eb3-49c1-a6f2-9fca0219f529',
     'f9525d88-71d1-403a-a240-9e97951b41a6',
     '9606cc0d-3245-4bba-8f2e-2673f296adbb',
     '3fa91a11-d810-442f-9ca7-ae3b3c905baa',
     '33d31560-b5af-4147-9963-08ecb73ee89f',
     '21efb64f-063b-4ff7-bdda-7643b0da1276',
     '4f6efa44-bc22-46ce-9475-2f

In [91]:
analyzePDF_results = textract.get_document_analysis(JobId=analyzePDF['JobId'])
paginationToken= analyzePDF_results['NextToken']
analyzePDF_results2 = textract.get_document_analysis(JobId=analyzePDF['JobId'], NextToken=paginationToken)

In [96]:
analyzePDF_results2.keys()

dict_keys(['DocumentMetadata', 'JobStatus', 'NextToken', 'Blocks', 'AnalyzeDocumentModelVersion', 'ResponseMetadata'])

In [97]:
analyzePDF_results2['ResponseMetadata']

{'RequestId': '000945fe-4454-49ee-809a-61289c1e4ec7',
 'HTTPStatusCode': 200,
 'HTTPHeaders': {'x-amzn-requestid': '000945fe-4454-49ee-809a-61289c1e4ec7',
  'content-type': 'application/x-amz-json-1.1',
  'content-length': '543151',
  'date': 'Mon, 13 Sep 2021 17:21:30 GMT'},
 'RetryAttempts': 0}

In [95]:
print(analyzePDF_results['DocumentMetadata'], 
      analyzePDF_results2['DocumentMetadata'])

{'Pages': 241} {'Pages': 241}


In [92]:
analyzePDF_results2

{'DocumentMetadata': {'Pages': 241},
 'JobStatus': 'SUCCEEDED',
 'NextToken': 'mZEttZNZj0yBDvf+PqnVRkH7IbjaezVAZMfzAZQtU1GhViF/r05C7Rub2RKYi1B49xM7C52FCzWv0BMo0sJYerpAkSpMFaPoXs1jLfDVzSx5HvZ1ng==',
 'Blocks': [{'BlockType': 'LINE',
   'Confidence': 71.55050659179688,
   'Text': '---',
   'Geometry': {'BoundingBox': {'Width': 0.0057271323166787624,
     'Height': 0.022356553003191948,
     'Left': 0.21548856794834137,
     'Top': 0.7664579153060913},
    'Polygon': [{'X': 0.21654854714870453, 'Y': 0.7887566089630127},
     {'X': 0.21548856794834137, 'Y': 0.7664579153060913},
     {'X': 0.22016039490699768, 'Y': 0.7665110230445862},
     {'X': 0.22121569514274597, 'Y': 0.7888144850730896}]},
   'Id': '541e543a-1c5c-4ab1-a2a8-1cd86be4a66d',
   'Relationships': [{'Type': 'CHILD',
     'Ids': ['1bb4e0be-7a46-46de-b7d8-c1b70cd99cf0']}],
   'Page': 4},
  {'BlockType': 'LINE',
   'Confidence': 87.39837646484375,
   'Text': '---',
   'Geometry': {'BoundingBox': {'Width': 0.0057321228086948395,
