In [None]:
import os
import json
from PIL import Image
from concurrent.futures import ThreadPoolExecutor
from ocr_wrapper import GoogleOCR, draw_bboxes, BBox

In [None]:
items = os.walk("imgs")
all_files = []
for root, dirs, files in items:
    for file in files:
        all_files.append(os.path.join(root, file))

In [None]:
ocr = GoogleOCR() # Create client

In [None]:
def process_image(img_path):
    img = Image.open(img_path)
    print("Processing image: ", img_path)
    bboxes = ocr.ocr(img, denoise=False)
    return bboxes

In [None]:
r = {}
for img_path in all_files:
    try:
        r[img_path] = process_image(img_path)
    except:
        print("Error processing image: ", img_path)
    break

In [None]:
r = {}
with ThreadPoolExecutor(max_workers=12) as executor:
    futures = executor.map(process_image, all_files)
    for img_path, future in zip(all_files, futures):
        try:
            r[img_path] = future
        except:
            print("Error processing image: ", img_path)

In [None]:
class BBoxEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, BBox):
            return obj.__dict__
        return super().default(obj)

with open("passport_bboxes_no_denoise.json", "w") as f:
    json.dump(r, f, indent=4, cls=BBoxEncoder)

In [None]:
with open("passport_bboxes_original.json", "r") as f:
    original = json.load(f)

with open("passport_bboxes_no_denoise.json", "r") as f:
    no_denoise = json.load(f)

all_keys = set(original.keys()) & set(no_denoise.keys())

In [None]:
from dataclasses import fields
from tqdm import tqdm

def draw(im, bbox_dicts: list[dict]):
    bboxes_orig = [BBox(**{k:v for k, v  in b.items() if k in [f.name for f in fields(BBox)]}) for b in bbox_dicts]
    annd = draw_bboxes(
        img=im, 
        bboxes=bboxes_orig,
        texts=[str(b.text) for b in bboxes_orig],
        strokewidths=2, # Could also be a list for each bbox
        fontsize=12,  # Could also be a list for each bbox
        # colors could also be a string for all bboxes
        colors="purple"
    )
    return annd

for k in tqdm(all_keys):
    try:
        new_filename = k.replace("imgs", "imgs_comparison")
        if os.path.exists(new_filename):
            continue
        im = Image.open(k)

        ann_original = draw(im, original[k])
        ann_no_desnoise = draw(im, no_denoise[k])
        complete = Image.new("RGB", (ann_original.width + ann_no_desnoise.width, ann_original.height))
        complete.paste(ann_original, (0,0))
        complete.paste(ann_no_desnoise, (ann_original.width, 0))
        os.makedirs(os.path.dirname(new_filename), exist_ok=True)
        complete.save(new_filename)
    except Exception as e:
        print("Error processing image: ", k)
        continue