In [None]:
import os
from concurrent.futures import ThreadPoolExecutor

from PIL import Image

from ocr_wrapper import GoogleOCR, draw_bboxes
from time import perf_counter_ns

def main(imgs, denoise: bool):
    ocr_client = GoogleOCR(ocr_samples=2)
    with ThreadPoolExecutor(max_workers=64) as executor:
        futures = [executor.submit(ocr_client.ocr, img, denoise=denoise) for img in imgs]
    results = [future.result() for future in futures]
    return results

In [None]:
img_names = os.listdir("imgs")[:1]
img_files = [Image.open(f"imgs/{f}") for f in img_names]

In [None]:
start = perf_counter_ns()
#res1 = main(img_files, denoise=False)
res2 = main(img_files, denoise=True)
print(f"Time: {(perf_counter_ns() - start) / 1e6} ms")

In [None]:
for img, r1, r2 in zip(img_files, res1, res2):
    im = Image.new("RGB", (1600, 800), (255, 255, 255))
    im1 = draw_bboxes(img, r1).resize((800, 800))
    im2 = draw_bboxes(img, r2).resize((800, 800))

    im.paste(im1, (0, 0))
    im.paste(im2, (800, 0))
im

In [None]:
import numpy as np
import cv2
from cv2 import fastNlMeansDenoising

In [None]:
from collections import defaultdict
from cv2 import fastNlMeansDenoisingColored
from tqdm import tqdm

im = np.array(img_files[0])
im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
res = defaultdict(list)
for nthread in tqdm(range(0, 13, 2)):
    cv2.setNumThreads(nthread)
    for i in range(3):
        start = perf_counter_ns()
        denoised = fastNlMeansDenoisingColored(im, None, h=10, hColor=10, templateWindowSize=7, searchWindowSize=21)
        res[nthread].append(perf_counter_ns() - start)

In [None]:
import matplotlib.pyplot as plt

baseline = np.mean(res[0])
plt.plot(res.keys(), [baseline / np.mean(v) for v in res.values()])
plt.xlabel("nthread")
plt.ylabel("speedup")

In [None]:
res