### Perform modifications

In [11]:
import json
import os

folder_path  = r'C:\Users\Habram\Documents\Datasets\SER_annotated_manual_v2.1'

# Get a list of all files in the folder
all_files = os.listdir(folder_path)

# Filter the files with the filename ending of '_hand.json'
hand_files = [file for file in all_files if file.endswith('_hand.json')]


for file_name in hand_files:
    file_path = folder_path + '/' + file_name  # Create the full path to the file
    
    # Opening JSON file
    f = open(file_path)
    data = json.load(f)

    new_data = []

    in_block = False
    label_type = None
    # Add the i character
    for idx, entry in enumerate(data):
        # Beginning of a new block
        if entry['label'].startswith('b') and entry['label'][1:] != 'Other':
            # Get the block's label type
            label_type = entry['label'][1:]
            in_block = True
        elif entry['label'] == label_type:
            entry['label'] = 'i' + entry['label']
        elif entry['label'] != label_type:
            label_type = None
            in_block = False

    for idx, entry in enumerate(data):
        # Beginning of a new block
        if entry['label'].startswith('b'):
            # Get the block's label type
            label_type = entry['label'][1:]
            # Remove the 'b' from the beginning
            entry['label'] = label_type
            # Put to the new data
            new_data.append(entry)
        # If the label is 'd', it means delete, do not include to the new 
        elif entry['label'] == 'd':
            continue
        # If the label belongs to a block
        # If it belongs to a block
        elif entry['label'].startswith('i'):
            new_data[-1]['text'] += ' ' + entry['text']
            new_data[-1]['words'].append(entry['words'][0])
        # If it is a 1-word label
        else:
            new_data.append(entry)

    # Fix the block bounding boxes
    for idx, entry in enumerate(new_data):
        # If the block has multiple words
        if len(entry['words']) > 1:
            x1, y1, x2, y2 = 15000, 15000, 0, 0
            for word in entry['words']:
                if x1 > word['box'][0]: x1 = word['box'][0]
                if y1 > word['box'][1]: y1 = word['box'][1]
                if x2 < word['box'][2]: x2 = word['box'][2]
                if y2 < word['box'][3]: y2 = word['box'][3]
            entry['box'] = [x1, y1, x2, y2]
        # If it is a one-word block
        else:
            continue    

    with open(folder_path + '/annotations/' + file_name[0:-10] + '.json', 'w') as f:
        json.dump(new_data, f)

### Inspect the modifications

In [15]:
from PIL import Image, ImageDraw, ImageFont

# Get a list of all files in the folder
all_files = os.listdir(folder_path+r'\annotations')
all_images= os.listdir(folder_path+r'\images')

for file, image in zip(all_files, all_images):
    invoice = Image.open(folder_path + r'\images' + '\\' + file[0:-5] + '.tif')
    invoice = invoice.convert('RGB')
    draw = ImageDraw.Draw(invoice)
    font = ImageFont.load_default()
    
    f = open(folder_path + r'\annotations' + '\\' + file)
    data = json.load(f)
    for entry in data:
        for word in entry['words']:
            draw.rectangle(word['box'], outline='grey')
            
        box = entry['box']
        if entry['label'] == 'Other':
            draw.rectangle(box, outline='green')
            draw.text((box[0] + 10, box[1] - 10), text=entry['label'], fill='green', font=font)
        else:
            draw.rectangle(box, outline='red')
            draw.text((box[0] + 10, box[1] - 10), text=entry['label'], fill='red', font=font)
    try:
        invoice.save(r'C:\Users\Habram\Pictures\Camera Roll'+ r'/' + image)
    except:
        continue



### Unnormalize the bounding boxes

In [9]:
from PIL import Image, ImageDraw, ImageFont
import json
import os

def unnormalize_box(bbox, width, height):
        return [
            int(width * (bbox[0] / 1000)),
            int(height * (bbox[1] / 1000)),
            int(width * (bbox[2] / 1000)),
            int(height * (bbox[3] / 1000)),
        ]

folder_path  = r'C:\Users\Habram\Desktop\old'
# Get a list of all files in the folder
all_files = os.listdir(folder_path)
# Filter the files with the filename ending of '_hand.json'
hand_files = [file for file in all_files if file.endswith('_hand.json')]
image_files = [file for file in all_files if file.endswith('.tif')]

for file, image in zip(hand_files, image_files):
    invoice = Image.open(folder_path + r'/' + image)
    invoice = invoice.convert('RGB')
    width, height = invoice.size

    draw = ImageDraw.Draw(invoice)
    font = ImageFont.load_default()

    f = open(folder_path + r'/' + file)
    data = json.load(f)

    # Unnormalize the bounding boxes if necessary
    for entry in data:
        entry['box'] = unnormalize_box(entry['box'], width, height)
        for word in entry['words']:
            word['box'] =  unnormalize_box(word['box'], width, height)

    with open(folder_path + r'/' + file, 'w') as f:
        json.dump(data, f)