<a href="https://colab.research.google.com/github/aseiple/ocr-testing-results/blob/main/OCRResults.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### General Notes
- Ignore Frame time and FPS for first image in set. OCR engines seem to take a frame to warm up.
- ML Kit doesn't report confidence so always marked as 1
- PaddleOCR set to cutoff anything below 0.5 confidence, this can be changed if not enough is being picked up

#### IPhone/GoPro Image Dataset
- Set of images #s (1-200)
- Taken from screenshots from videos on: https://www.youtube.com/@ActionKid

#### Backpack Image Dataset
- Set of images #s (1-12)
- Photos are attempted to be taken from the same moment in time with a couple of frames of margin to ensure least blur

## Usage
- Click the play button below the settings header
  - This may take a while as the image sets are downloaded
- The available settings should load below the header
- Set options and click submit
- Output images will be loaded
- Subsequent generations will appear at the bottom of the cell

In [None]:
#@title Settings
#Imports
!git clone https://github.com/aseiple/ocr-testing-results

import matplotlib.pyplot as plt
import cv2
import json

%pip install -q ipywidgets
from ipywidgets import interact, interactive, fixed, interact_manual, Box
import ipywidgets as widgets
from IPython.display import clear_output 

plot_scale = widgets.BoundedFloatText(
    value=1,
    min=0.25,
    max=3,
    step=0.25,
    description='Output Scale:',
    disable=False,
    style=dict(description_width='initial'),
    continuous_update=False
)

#Image 1
img1_checkbox = widgets.Checkbox(
    value=True,
    description='Image 1 Enabled?',
    disabled=False,
    indent=False,
    continuous_update=False
)

img1_camera_dropdown = widgets.Dropdown(
    options=['IPhone/GoPro', 'Backpack 2MP Wide', 'Backpack 16MP Wide', 'Backpack AutoFocus'],
    value='Backpack 2MP Wide',
    description='Camera 1:',
    disabled=False,
    continuous_update=False
)

img1_model_dropdown = widgets.Dropdown(
    options=['MLKit', 'PaddleOCR'],
    value='PaddleOCR',
    description='Model 1:',
    disabled=False,
    continuous_update=False
)

img1_image_num = widgets.BoundedIntText(
    value=1,
    min=1,
    max=200,
    step=1,
    description='Image 1 #:',
    disable=False,
    continuous_update=False
)

img1_section = [img1_checkbox, img1_camera_dropdown, img1_model_dropdown, img1_image_num]

#Image 2
img2_checkbox = widgets.Checkbox(
    value=True,
    description='Image 2 Enabled?',
    disabled=False,
    indent=False,
    continuous_update=False
)

img2_camera_dropdown = widgets.Dropdown(
    options=['IPhone/GoPro', 'Backpack 2MP Wide', 'Backpack 16MP Wide', 'Backpack AutoFocus'],
    value='Backpack 16MP Wide',
    description='Camera 2:',
    disabled=False,
    continuous_update=False
)

img2_model_dropdown = widgets.Dropdown(
    options=['MLKit', 'PaddleOCR'],
    value='PaddleOCR',
    description='Model 2:',
    disabled=False,
    continuous_update=False
)

img2_image_num = widgets.BoundedIntText(
    value=1,
    min=1,
    max=200,
    step=1,
    description='Image 2 #:',
    disable=False,
    continuous_update=False
)

img2_section = [img2_checkbox, img2_camera_dropdown, img2_model_dropdown, img2_image_num]

#Image 3
img3_checkbox = widgets.Checkbox(
    value=True,
    description='Image 3 Enabled?',
    disabled=False,
    indent=False,
    continuous_update=False
)

img3_camera_dropdown = widgets.Dropdown(
    options=['IPhone/GoPro', 'Backpack 2MP Wide', 'Backpack 16MP Wide', 'Backpack AutoFocus'],
    value='Backpack AutoFocus',
    description='Camera 3:',
    disabled=False,
    continuous_update=False
)

img3_model_dropdown = widgets.Dropdown(
    options=['MLKit', 'PaddleOCR'],
    value='PaddleOCR',
    description='Model 3:',
    disabled=False,
    continuous_update=False
)

img3_image_num = widgets.BoundedIntText(
    value=1,
    min=1,
    max=200,
    step=1,
    description='Image 3 #:',
    disable=False,
    continuous_update=False
)

img3_section = [img3_checkbox, img3_camera_dropdown, img3_model_dropdown, img3_image_num]

submit_button = widgets.Button(
    description='Submit',
    disabled=False,
)

out = widgets.Output()
out.clear_output()

def jsonPath(model_name, camera_name):
  root_path = './ocr-testing-results/json/'
  json_path_dict = {
    'MLKit': {
        'IPhone/GoPro': 'eval_set/mlkit_eval_set_720.json',
        'Backpack 2MP Wide': 'backpack_eval_set/mlkit_cam1_old_wide_images.json',
        'Backpack 16MP Wide': 'backpack_eval_set/mlkit_cam3_new_wide_images.json',
        'Backpack AutoFocus': 'backpack_eval_set/mlkit_cam2_af_images.json'
    },
    'PaddleOCR': {
        'IPhone/GoPro': 'eval_set/paddle_eval_set_720.json',
        'Backpack 2MP Wide': 'backpack_eval_set/paddle_cam1_old_wide_images.json',
        'Backpack 16MP Wide': 'backpack_eval_set/paddle_cam3_new_wide_images.json',
        'Backpack AutoFocus': 'backpack_eval_set/paddle_cam2_af_images.json'
    }
  }

  return root_path + json_path_dict[model_name][camera_name]

def imagePath(camera_name, num):
  root_path = './ocr-testing-results/'
  img_name = f'{num:03}.png'
  image_path_dict = {
      'IPhone/GoPro': 'eval_set_720/',
      'Backpack 2MP Wide': 'backpack_eval_set_1080/cam1_old_wide/',
      'Backpack 16MP Wide': 'backpack_eval_set_1080/cam3_new_wide/',
      'Backpack AutoFocus': 'backpack_eval_set_1080/cam2_af/'
  }

  return root_path + image_path_dict[camera_name] + img_name

def submit(_):
  out.clear_output()
  print('LOADING PLEASE WAIT...')
  fig = plt.figure(figsize=(16*plot_scale.get_interact_value(),9*plot_scale.get_interact_value()))
  plt.rcParams.update({'font.size': 15*plot_scale.get_interact_value()})

  num_of_cells = sum(bool(x) for x in [img1_checkbox.get_interact_value(), img2_checkbox.get_interact_value(), img3_checkbox.get_interact_value()])

  # Image 1 Preprocessing
  if img1_checkbox.get_interact_value():
    image1_model = img1_model_dropdown.get_interact_value()
    image1_camera = img1_camera_dropdown.get_interact_value()
    image1_num = img1_image_num.get_interact_value() - 1
    if image1_camera != 'IPhone/GoPro':
      image1_num = max(0, min(image1_num, 12))

    # Image
    image1 = cv2.imread(imagePath(image1_camera, image1_num))
    image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)

    # JSON
    json_file = open(jsonPath(image1_model, image1_camera))
    json_parse = json.load(json_file)
    json_data = json_parse[next(iter(json_parse))][image1_num]

    # Bounding Boxes
    image1_bbs = []
    for rg in json_data['regions']:
      coord = []
      for bb in rg['bounding_box']:
        coord.append([round(bb['x']), round(bb['y'])])
      coord.append(coord[0])
      xs, ys = zip(*coord)
      image1_bbs.append([xs, ys])

    # Text
    time = round(json_data['time_ms'], 2)
    fps = round(1/(time/1000), 2)
    fulltext = json_data['plaintext']
    regionText = ''
    for rg in json_data['regions']:
      regionText += rg['text'] + ' (' + str(round(rg['confidence'], 4)) + ')\n'

    image1_text = f'{time} ms ({fps}fps)\n\n{regionText}'


  # Image 2 Preprocessing
  if img2_checkbox.get_interact_value():
    image2_model = img2_model_dropdown.get_interact_value()
    image2_camera = img2_camera_dropdown.get_interact_value()
    image2_num = img2_image_num.get_interact_value() - 1
    if image2_camera != 'IPhone/GoPro':
      image2_num = max(0, min(image2_num, 12))

    # Image
    image2 = cv2.imread(imagePath(image2_camera, image2_num))
    image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)

    # JSON
    json_file = open(jsonPath(image2_model, image2_camera))
    json_parse = json.load(json_file)
    json_data = json_parse[next(iter(json_parse))][image2_num]

    # Bounding Boxes
    image2_bbs = []
    for rg in json_data['regions']:
      coord = []
      for bb in rg['bounding_box']:
        coord.append([round(bb['x']), round(bb['y'])])
      coord.append(coord[0])
      xs, ys = zip(*coord)
      image2_bbs.append([xs, ys])

    # Text
    time = round(json_data['time_ms'], 2)
    fps = round(1/(time/1000), 2)
    fulltext = json_data['plaintext']
    regionText = ''
    for rg in json_data['regions']:
      regionText += rg['text'] + ' (' + str(round(rg['confidence'], 4)) + ')\n'

    image2_text = f'{time} ms ({fps}fps)\n\n{regionText}'


  # Image 3 Preprocessing
  if img3_checkbox.get_interact_value():
    image3_model = img3_model_dropdown.get_interact_value()
    image3_camera = img3_camera_dropdown.get_interact_value()
    image3_num = img3_image_num.get_interact_value() - 1
    if image3_camera != 'IPhone/GoPro':
      image3_num = max(0, min(image3_num, 12))

    # Image
    image3 = cv2.imread(imagePath(image3_camera, image3_num))
    image3 = cv2.cvtColor(image3, cv2.COLOR_BGR2RGB)

    # JSON
    json_file = open(jsonPath(image3_model, image3_camera))
    json_parse = json.load(json_file)
    json_data = json_parse[next(iter(json_parse))][image3_num]

    # Bounding Boxes
    image3_bbs = []
    for rg in json_data['regions']:
      coord = []
      for bb in rg['bounding_box']:
        coord.append([round(bb['x']), round(bb['y'])])
      coord.append(coord[0])
      xs, ys = zip(*coord)
      image3_bbs.append([xs, ys])

    # Text
    time = round(json_data['time_ms'], 2)
    fps = round(1/(time/1000), 2)
    fulltext = json_data['plaintext']
    regionText = ''
    for rg in json_data['regions']:
      regionText += rg['text'] + ' (' + str(round(rg['confidence'], 4)) + ')\n'

    image3_text = f'{time} ms ({fps}fps)\n\n{regionText}'

  # Plotting
  if num_of_cells == 3:
    # Draw 1
    fig.add_subplot(2, num_of_cells, 1)
    plt.imshow(image1)
    plt.axis('off')
    plt.tight_layout()
    plt.title(f'{image1_model} - {image1_camera}')
    for image1_bb in image1_bbs:
      plt.plot(image1_bb[0], image1_bb[1])
    fig.add_subplot(2, num_of_cells, 4)
    plt.text(0.5,1,image1_text,ha='center',va='top')
    plt.axis('off')
    plt.tight_layout()

    # Draw 2
    fig.add_subplot(2, num_of_cells, 2)
    plt.imshow(image2)
    plt.axis('off')
    plt.tight_layout()
    plt.title(f'{image2_model} - {image2_camera}')
    for image2_bb in image2_bbs:
      plt.plot(image2_bb[0], image2_bb[1])
    fig.add_subplot(2, num_of_cells, 5)
    plt.text(0.5,1,image2_text,ha='center',va='top')
    plt.axis('off')
    plt.tight_layout()

    # Draw 3
    fig.add_subplot(2, num_of_cells, 3)
    plt.imshow(image3)
    plt.axis('off')
    plt.tight_layout()
    plt.title(f'{image3_model} - {image3_camera}')
    for image3_bb in image3_bbs:
      plt.plot(image3_bb[0], image3_bb[1])
    fig.add_subplot(2, num_of_cells, 6)
    plt.text(0.5,1,image3_text,ha='center',va='top')
    plt.axis('off')
    plt.tight_layout()


  elif num_of_cells == 2:
    # Draw 1
    fig.add_subplot(2, num_of_cells, 1)
    plt.imshow(image1)
    plt.axis('off')
    plt.tight_layout()
    plt.title(f'{image1_model} - {image1_camera}')
    for image1_bb in image1_bbs:
      plt.plot(image1_bb[0], image1_bb[1])
    fig.add_subplot(2, num_of_cells, 3)
    plt.text(0.5,1,image1_text,ha='center',va='top')
    plt.axis('off')
    plt.tight_layout()

    # Draw 2
    fig.add_subplot(2, num_of_cells, 2)
    plt.imshow(image2)
    plt.axis('off')
    plt.tight_layout()
    plt.title(f'{image2_model} - {image2_camera}')
    for image2_bb in image2_bbs:
      plt.plot(image2_bb[0], image2_bb[1])
    fig.add_subplot(2, num_of_cells, 4)
    plt.text(0.5,1,image2_text,ha='center',va='top')
    plt.axis('off')
    plt.tight_layout()


  else:
    # Draw 1
    fig.add_subplot(2, num_of_cells, 1)
    plt.imshow(image1)
    plt.axis('off')
    plt.tight_layout()
    plt.title(f'{image1_model} - {image1_camera}')
    for image1_bb in image1_bbs:
      plt.plot(image1_bb[0], image1_bb[1])
    fig.add_subplot(2, num_of_cells, 2)
    plt.text(0.5,1,image1_text,ha='center',va='top')
    plt.axis('off')
    plt.tight_layout()

clear_output()
submit_button.on_click(submit)
widgets.VBox([plot_scale, *img1_section, *img2_section, *img3_section, submit_button, out])