In [10]:
from transformers import LayoutLMv2ImageProcessor

image_processor = LayoutLMv2ImageProcessor(apply_ocr=True)

In [30]:
from PIL import Image

dir = r'C:\Users\Habram\Documents\Datasets\SER_annotated_manual'
''' !!!Define the file HERE!!!'''
file = r'\50Hertz_1'

invoice = Image.open(dir + file + '.tif')
invoice = invoice.convert('RGB')

img_features = image_processor(invoice, return_tensors='pt')
image = img_features['pixel_values']
words = img_features['words'][0]
boxes = img_features['boxes'][0]

In [43]:
from PIL import ImageDraw, ImageFont

draw = ImageDraw.Draw(invoice)
font = ImageFont.load_default()
width, height = invoice.size

def unnormalize_box(bbox, width, height):
     return [
         int(width * (bbox[0] / 1000)),
         int(height * (bbox[1] / 1000)),
         int(width * (bbox[2] / 1000)),
         int(height * (bbox[3] / 1000)),
     ]

for box, word in zip(boxes, words):
    box_normalized = unnormalize_box(box, width, height)
    # Draw the label bounding box and name on the invoice
    draw.rectangle(box_normalized, outline='red')
    try:
        draw.text((box_normalized[0] + 10, box_normalized[1] - 10), text=word, fill='red', font=font)
    except:
        draw.text((box_normalized[0] + 10, box_normalized[1] - 10), text='Something', fill='red', font=font)

invoice.show()

In [14]:
annotation = []

for box, word in zip(boxes, words):
    box_normalized = unnormalize_box(box, width, height)
    annotation.append({
        "text": word,
        "label": "Other",
        "box": box,
        "words": [{
            "box": box,
            "text": word
        }]
    })

In [15]:
import json

with open(dir + file + '_raw.json', 'w') as f:
    json.dump(annotation, f)

with open(dir + file + '_hand.json', 'w') as f:
    json.dump(annotation, f)    

# Manual work comes here
# Work on the file with '_hand.json' ending

In [25]:
import json

# Opening JSON file
f = open(dir + file + '_hand.json')
  
# returns JSON object as 
# a dictionary
data = json.load(f)

In [26]:
new_data = []

for idx, entry in enumerate(data):
    # Beginning of a new block
    if entry['label'].startswith('b'):
        # Get the block's label type
        label_type = entry['label'][1:]
        # Remove the 'b' from the beginning
        entry['label'] = label_type
        # Put to the new data
        new_data.append(entry)
    # If the label is 'd', it means delete, do not include to the new 
    elif entry['label'] == 'd':
        continue
    # If the label is just a regular label
    else:
        # If it belongs to a block
        if entry['label'] == new_data[-1]['label']:
            new_data[-1]['text'] += ' ' + entry['text']
            new_data[-1]['words'].append(entry['words'][0])
        # If it is a 1-word label
        else:
            # Put to the new data
            new_data.append(entry)

# Fix the block bounding boxes
for idx, entry in enumerate(new_data):
    # If the block has multiple words
    if len(entry['words']) > 1:
        x1, y1, x2, y2 = 15000, 15000, 0, 0
        for word in entry['words']:
            if x1 > word['box'][0]: x1 = word['box'][0]
            if y1 > word['box'][1]: y1 = word['box'][1]
            if x2 < word['box'][2]: x2 = word['box'][2]
            if y2 < word['box'][3]: y2 = word['box'][3]
        entry['box'] = [x1, y1, x2, y2]
    # If it is a one-word block
    else:
        continue

In [28]:
import json
with open(dir + file + '_done.json', 'w') as f:
    json.dump(new_data, f)

In [27]:
from PIL import ImageDraw, ImageFont, Image

invoice = Image.open(dir + file + '.tif')
invoice = invoice.convert('RGB')

draw = ImageDraw.Draw(invoice)
font = ImageFont.load_default()
width, height = invoice.size

def unnormalize_box(bbox, width, height):
     return [
         width * (bbox[0] / 1000),
         height * (bbox[1] / 1000),
         width * (bbox[2] / 1000),
         height * (bbox[3] / 1000),
     ]

for entry in new_data:
    for word in entry['words']:
        draw.rectangle(unnormalize_box(word['box'], width, height), outline='grey')
        
    box = unnormalize_box(entry['box'], width, height)
    if entry['label'] == 'Other':
        draw.rectangle(box, outline='green')
        draw.text((box[0] + 10, box[1] - 10), text=entry['label'], fill='green', font=font)
    else:
        draw.rectangle(box, outline='red')
        draw.text((box[0] + 10, box[1] - 10), text=entry['label'], fill='red', font=font)

invoice.show()

# Generate dataset from files

In [46]:
import os

filepath = r'C:\Users\Habram\Documents\Datasets\SER_annotated_manual'
ann_dir = os.path.join(filepath, "annotations")
img_dir = os.path.join(filepath, "images")

for guid, file in enumerate(sorted(os.listdir(ann_dir))):
    file_path = os.path.join(ann_dir, file)
    with open(file_path, "r", encoding="utf8") as f:
        data = json.load(f)
    image_path = os.path.join(img_dir, file)
    image_path = image_path.replace("json", "tif")
    invoice = Image.open(image_path)
    invoice = invoice.convert('RGB')
    width, height = invoice.size

    print(width, height)

    for block in data:
        block['box'] = unnormalize_box(block['box'], width, height)
        for word in block['words']:
            word['box'] = unnormalize_box(word['box'], width, height)

    with open(file_path, 'w') as f:
        json.dump(data, f)  

1654 2339
2480 3508
2479 3508
2501 3533
2494 3533
2480 3508
1654 2339
2495 3539
2486 3534
1653 2339


In [37]:
for entry in data:
    print(entry['box'])
    break

[119, 170, 248, 175]
