<a href="https://colab.research.google.com/github/fabiodr/Video_OCR/blob/main/Video_OCR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# OCR on Video

In [None]:
# Install Necessary Packages
!pip install roboflow inference supervision easyocr -q

In [None]:
# Helper class for tracking OCR consensus
class Consensus:
  def __init__(self, threshold=10):
    self.items = {}
    self.threshold = threshold

  def add_candidate(self, item_name, item_value):
    self.items.setdefault(item_name,{})
    self.items[item_name].setdefault(item_value,0)
    self.items[item_name][item_value]+=1

    return self.items[item_name]

  def get_candidates(self, item_name):
    return self.items[item_name]

  def get_top_candidate(self, item_name):
    if(item_name not in self.items): return None

    top_value = max(self.items[item_name], key=self.items[item_name].get)
    top_score = self.items[item_name][top_value]
    top_confidence = ((top_score-1)/self.threshold)*100
    return (top_value, top_score, top_confidence)

  def has_exceeded_threshold(self, item_name):
    candidates = self.items[item_name]
    return any(x > self.threshold for x in candidates.values())

  def winner(self, item_name):
    if(item_name not in self.items): return None

    if(self.has_exceeded_threshold(item_name)):
      return self.get_top_candidate(item_name)[0]
    else:
      return None

  def top_candidates(self):
    top = {}
    for item_name in self.items.keys():
      top[item_name] = self.get_top_candidate(item_name)

    return top

In [None]:
# Model: https://universe.roboflow.com/roboflow-universe-projects/yard-management-system

In [None]:
import supervision as sv
from inference_sdk import InferenceHTTPClient
from google.colab import userdata

CLIENT = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key=userdata.get('ROBOFLOW_API_KEY')
)

def predict(image):
  result = CLIENT.infer(image, model_id="yard-management-system/4")
  detections = sv.Detections.from_inference(result)
  return detections

In [None]:
VIDEO_PATH = "container.mp4"

In [None]:
# Helper Functions
def check_within(box1, box2):
    x_overlap = max(0, min(box1[2], box2[2]) - max(box1[0], box2[0]))
    y_overlap = max(0, min(box1[3], box2[3]) - max(box1[1], box2[1]))

    overlap_area = x_overlap * y_overlap

    within = overlap_area >= 0.5

    return within


import easyocr
import re
ocr_cache = {}
reader = easyocr.Reader(['en'])

ocr_tracker = Consensus(5)
def ocr(frame,box,id):
  winner = ocr_tracker.winner(id)
  if(winner):
    return winner

  x1, y1, x2, y2 = box
  cropped_frame = frame[int(y1):int(y2), int(x1):int(x2)]

  result = []
  try:
    result = reader.readtext(cropped_frame,detail=0)
  except:
    print("OCR failed on image")

  text = "".join(result)
  text = re.sub(r'[^0-9]', '', text)

  if len(text)>0:
    ocr_tracker.add_candidate(id,text)

  return text






In [None]:
import supervision as sv
import cv2
import numpy as np

# Initalize ByteTrack
tracker = sv.ByteTrack()

# Consensus monitoring utility
container_ids_tracker = Consensus()

# Keeps track of the IDs of detected containers
container_ids = {}

# A callback function runs for each video frame
def video_callback(frame, i):
  detections = predict(frame)
  detections = tracker.update_with_detections(detections)

  relevant_detections = detections[(detections.class_id == 1) | (detections.class_id == 2)]
  container_detections = detections[detections.class_id==1]
  id_detections = detections[detections.class_id==2]

  for i_idx, id_detection in enumerate(id_detections):
      id_box = id_detection[0]
      for c_idx, container_detection in enumerate(container_detections):
          # If an ID is within a container,
          if check_within(id_box, container_detection[0]):
              parent_container_id = container_detection[4]

              container_id_winner = container_ids_tracker.winner(parent_container_id)
              if container_id_winner: continue

              ocr_result = ocr(frame,id_box,id_detection[4])
              container_ids_tracker.add_candidate(parent_container_id,ocr_result)

  container_labels = []
  for d in container_detections:
    container_id_top = container_ids_tracker.get_top_candidate(d[4])
    if container_id_top: container_labels.append("Container Unknown")
    else:
      container_id, score, confidence = container_id_top
      container_labels.append(f"Container #{container_id}")

  id_labels = []
  for d in id_detections:
    top_id_ocr = ocr_tracker.get_top_candidate(d[4])
    if top_id_ocr:
      id_labels.append(f"Read As: {top_id_ocr[0]} ({round(top_id_ocr[2])}%)")
    else:
      id_labels.append(f"Haven't Read")

  annotated_frame = frame.copy()
  annotated_frame = sv.BoundingBoxAnnotator().annotate(annotated_frame,relevant_detections)
  annotated_frame = sv.LabelAnnotator(
      text_position=sv.Position.CENTER,
      text_scale=1.25,
      text_thickness=2
  ).annotate(annotated_frame,container_detections,container_labels)
  annotated_frame = sv.LabelAnnotator(
      text_scale=0.5
  ).annotate(annotated_frame,id_detections,id_labels)

  print(container_ids_tracker.top_candidates())

  return annotated_frame

sv.process_video(VIDEO_PATH,"cargo_processed.mp4",video_callback)

from google.colab import files
files.download('cargo_processed.mp4')

0
{17: ('1247208', 1, 0.0)}
1
{17: ('1247208', 1, 0.0)}
2
{17: ('247208', 2, 10.0)}
3
{17: ('247208', 3, 20.0)}
4
{17: ('247208', 4, 30.0)}
5
{17: ('247208', 4, 30.0)}
6
{17: ('247208', 5, 40.0)}
7
{17: ('247208', 5, 40.0)}
8
{17: ('247208', 6, 50.0)}
9
{17: ('247208', 7, 60.0)}
10
{17: ('247208', 8, 70.0)}
11
{17: ('247208', 9, 80.0)}
12
{17: ('247208', 10, 90.0)}
13
{17: ('247208', 11, 100.0)}
14
{17: ('247208', 11, 100.0)}
15
{17: ('247208', 11, 100.0)}
16
{17: ('247208', 11, 100.0)}
17
{17: ('247208', 11, 100.0)}
18
{17: ('247208', 11, 100.0)}
19
{17: ('247208', 11, 100.0)}
20
{17: ('247208', 11, 100.0)}
21
{17: ('247208', 11, 100.0)}
22
{17: ('247208', 11, 100.0)}
23
{17: ('247208', 11, 100.0)}
24
{17: ('247208', 11, 100.0)}
25
{17: ('247208', 11, 100.0)}
26
{17: ('247208', 11, 100.0)}
27
{17: ('247208', 11, 100.0)}
28
{17: ('247208', 11, 100.0)}
29
{17: ('247208', 11, 100.0)}
30
{17: ('247208', 11, 100.0)}
31
{17: ('247208', 11, 100.0)}
32
{17: ('247208', 11, 100.0)}
33
{17: ('24

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>