<a href="https://colab.research.google.com/github/jmau2002/EMDocs/blob/master/easyocr_batched_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# easyocr test
!pip install git+git://github.com/jaidedai/easyocr.git

Collecting git+git://github.com/jaidedai/easyocr.git
  Cloning git://github.com/jaidedai/easyocr.git to /tmp/pip-req-build-p4ed2env
  Running command git clone --filter=blob:none --quiet git://github.com/jaidedai/easyocr.git /tmp/pip-req-build-p4ed2env


# Neuer Abschnitt

In [None]:
!nvidia-smi

Mon Sep 20 14:07:44 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.63.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P8    27W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from __future__ import print_function

import easyocr
import numpy as np
import time
import cv2
import sys
import os

if sys.version_info[0] == 2:
    from six.moves.urllib.request import urlretrieve
else:
    from urllib.request import urlretrieve


def test_single_and_batched_text_detection_and_prediction():
    reader = easyocr.Reader(['en'])
    # test with easy logos to ensure same results
    # test for single image with old api
    result = reader.readtext(
        "https://pytorch.org/tutorials/_static/img/thumbnails/cropped/profiler.png")
    assert len(result), 1
    assert result[0][1], 'PyTorch'
    print(result)
    print("Single image test with readtext successful")

    # test for single image with new api
    result = reader.readtext_batched(
        "https://pytorch.org/tutorials/_static/img/thumbnails/cropped/profiler.png")
    assert len(result), 1
    assert result[0][0][1], 'PyTorch'
    print(result)
    print("Single image test with readtext_batched successful")

    # test for a list of images in batch
    img_path = [
        "https://pytorch.org/tutorials/_static/img/thumbnails/cropped/profiler.png",
        "https://www.tensorflow.org/images/tf_logo_social.png",
        "https://storage.googleapis.com/gd-wagtail-prod-assets/original_images/evolving_google_identity_2x1.jpg"]

    """
    all images in image list must be of the same size for batched inference
        for eg, result = reader.readtext_batched(img_path) will fail here
        so either resize all images to the same size before passing to readtext_batched
        or call the func like so reader.readtext_batched(img_path, n_width=800, n_height=600)
    """
    # warning, for better results, it is recommended to maintain aspect while resizing
    result = reader.readtext_batched(img_path, n_width=800, n_height=600)
    assert len(result), 3
    assert result[0][0][1], 'PyTorch'
    assert result[1][0][1], 'TensorFlow'
    assert result[2][0][1], 'Google'
    print(result)
    print("Batched image test with readtext_batched successful")

    ############################################################################
    # inference time test between sequential and batch processing
    # batch processing will be faster when using GPU
    ############################################################################
    # pre-download, load and resize images for inference time test
    img_path = [
        "https://pytorch.org/tutorials/_static/img/thumbnails/cropped/profiler.png",
        "https://www.tensorflow.org/images/tf_logo_social.png",
        "https://storage.googleapis.com/gd-wagtail-prod-assets/original_images/evolving_google_identity_2x1.jpg"]

    cv2_images = []
    for i, path in enumerate(img_path):
        tmp, _ = urlretrieve(path)
        cv2_img = cv2.resize(cv2.imread(tmp), (800, 600))
        cv2_images.append(cv2_img)
        os.remove(tmp)

    img_repeat, num_loop = 5, 1
    cv2_images = np.array(cv2_images)
    # np repeat to get a batch of 15 images, getting arr 15,600,800,3
    cv2_images_repeat1 = np.repeat(cv2_images, repeats=img_repeat, axis=0)
    cv2_images_repeat2 = cv2_images_repeat1.copy()
    print(
        f"Running inference speed test with an image array of shape {cv2_images_repeat1.shape} for {num_loop} iterations")

    # sequential processing
    # run batch processing test
    reader = easyocr.Reader(['en'])
    itime = time.time()
    for i in range(num_loop):
        for img in cv2_images_repeat1:
            reader.readtext(img)
    print(
        "Single/Sequential image inference time per image: " +
        f"{(time.time()-itime)/(num_loop*cv2_images_repeat1.shape[0]):.3f}s")
    # batched processing
    reader = easyocr.Reader(['en'], cudnn_benchmark=True)

    # warmup for batched inference on GPU, using same batch size for all subsequent inference
    # cudnn benchmark should be set to True
    # see this issue https://discuss.pytorch.org/t/model-inference-very-slow-when-batch-size-changes-for-the-first-time/44911
    dummy = np.zeros([len(img_path) * img_repeat, 600, 800, 3], dtype=np.uint8)
    reader.readtext_batched(dummy)

    # run batch processing test
    itime = time.time()
    for i in range(num_loop):
        reader.readtext_batched(cv2_images_repeat2)
    print(
        "Batched image inference time per image: " +
        f"{(time.time()-itime)/(num_loop*cv2_images_repeat1.shape[0]):.3f}s")


In [None]:
test_single_and_batched_text_detection_and_prediction()
test_single_and_batched_text_detection_and_prediction()

[([[158, 227], [546, 227], [546, 362], [158, 362]], 'PyTorch', 0.8182700892599657)]
Single image test with readtext successful
[[([[158, 227], [546, 227], [546, 362], [158, 362]], 'PyTorch', 0.8182700892599657)]]
Single image test with readtext_batched successful
[[([[218, 226], [719, 226], [719, 362], [218, 362]], 'PyTorch', 0.6865308673868696)], [([[155, 365], [643, 365], [643, 481], [155, 481]], 'TensorFlow', 0.9983146702913579)], [([[173, 190], [624, 190], [624, 418], [173, 418]], 'Google', 0.9934508520695108)]]
Batched image test with readtext_batched successful
Running inference speed test with an image array of shape (15, 600, 800, 3) for 1 iterations
Single/Sequential image inference time per image: 0.304s
Batched image inference time per image: 0.163s
[([[158, 227], [546, 227], [546, 362], [158, 362]], 'PyTorch', 0.8182700892599657)]
Single image test with readtext successful
[[([[158, 227], [546, 227], [546, 362], [158, 362]], 'PyTorch', 0.8182700892599657)]]
Single image tes