In [1]:
import cv2 as cv
from glob import glob
import os
import random
from ultralytics import YOLO



In [2]:
import ast
import easyocr
import numpy as np
import pandas as pd
import string

In [3]:
## if training on custom data is needed ##
#dataset = 'data/label_test2.yaml'
#backbone = YOLO("yolov8s.pt")  # load a pre-trained model (recommended for training)
#results_train = backbone.train(data=dataset, epochs=120,name='label_test2')

In [5]:
rl_model = YOLO('models/best.pt')

videos = glob('data/video/*.mp4')


In [9]:
reader = easyocr.Reader(['en'], gpu=True)

In [10]:
def read_label_digits(label_crop):
    detections = reader.readtext(label_crop, allowlist ='0123456789')
    for detection in detections:
        bbox, text, score = detection

        text = text.upper().replace(' ', '')
        return text, score

    return None, None

In [11]:
def write_csv(results, output_path):
    
    with open(output_path, 'w') as f:
        f.write('{},{},{},{},{},{}\n'.format(
            'frame_number', 'track_id', 'label_bbox', 'label_bbox_score', 'read_label',
            'text_score'))

        for frame_number in results.keys():
            for track_id in results[frame_number].keys():
                if 'label' in results[frame_number][track_id].keys() and \
                   'read_label' in results[frame_number][track_id]['label'].keys():
                    f.write('{},{},{},{},{},{}\n'.format(
                        frame_number,
                        track_id,
                        '[{} {} {} {}]'.format(
                            results[frame_number][track_id]['label']['bbox'][0],
                            results[frame_number][track_id]['label']['bbox'][1],
                            results[frame_number][track_id]['label']['bbox'][2],
                            results[frame_number][track_id]['label']['bbox'][3]
                        ),
                        results[frame_number][track_id]['label']['bbox_score'],
                        results[frame_number][track_id]['label']['read_label'],
                        results[frame_number][track_id]['label']['text_score'])
                    )
        f.close()

In [12]:
results = {}
video = cv.VideoCapture(videos[0])
ret = True
frame_number = -1


while ret:
    frame_number += 1
    ret, frame = video.read()

    if ret and frame_number < 1400:
        results[frame_number] = {}
        # label detector
        detections = rl_model.track(frame, persist=True,verbose=False)[0]
        for detection in detections.boxes.data.tolist():
            if len(detection)<7:
                continue
            else:
                x1, y1, x2, y2, track_id, score, _ = detection
                if score > 0.5:
                    label_bounding_boxes = []
                    label_bounding_boxes.append([x1, y1, x2, y2, track_id, score])
                    # process label
                    for label in label_bounding_boxes:
                        label_x1, label_y1, label_x2, label_y2,_ ,label_score = label
                        # crop plate from region of interest
                        crop_label = frame[int(label_y1):int(label_y2), int(label_x1):int(label_x2)]
                        
                        # enlarge and make digits clearer
                        gray = cv.cvtColor(cv.resize(crop_label,(0,0),fx=5,fy=5,interpolation=cv.INTER_CUBIC), cv.COLOR_BGR2GRAY)
                        label_thresh = cv.adaptiveThreshold(cv.medianBlur(gray, 7), 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2)
    
                        cv.imwrite('./outputs/images/'+str(frame_number) + '_to_read.jpg', label_thresh)
                        # OCR
                        rl_text, rl_score = read_label_digits(label_thresh)
    
                        # if plate could be read write results
                        if rl_text is not None:
                            results[frame_number][track_id] = {
                                'label': {
                                    'bbox': [label_x1, label_y1, label_x2, label_y2],
                                    'bbox_score': label_score,
                                    'read_label': rl_text,
                                    'text_score': rl_score}}

write_csv(results, './results.csv')
video.release()

In [13]:
def draw_border(img, top_left, bottom_right, color=(0, 255, 0), thickness=6, line_length_x=200, line_length_y=200):
    x1, y1 = top_left
    x2, y2 = bottom_right

    cv.line(img, (x1, y1), (x1, y1 + line_length_y), color, thickness)  #-- top-left
    cv.line(img, (x1, y1), (x1 + line_length_x, y1), color, thickness)

    cv.line(img, (x1, y2), (x1, y2 - line_length_y), color, thickness)  #-- bottom-left
    cv.line(img, (x1, y2), (x1 + line_length_x, y2), color, thickness)

    cv.line(img, (x2, y1), (x2 - line_length_x, y1), color, thickness)  #-- top-right
    cv.line(img, (x2, y1), (x2, y1 + line_length_y), color, thickness)

    cv.line(img, (x2, y2), (x2, y2 - line_length_y), color, thickness)  #-- bottom-right
    cv.line(img, (x2, y2), (x2 - line_length_x, y2), color, thickness)

    return img

In [14]:
# read video by index
video = cv.VideoCapture(videos[0])

# get video dims
frame_width = int(video.get(3))
frame_height = int(video.get(4))
size = (frame_width, frame_height)

# Define the codec and create VideoWriter object
fourcc = cv.VideoWriter_fourcc(*'DIVX')
out = cv.VideoWriter('./outputs/processed.avi', fourcc, 20.0, size)

# reset video before you re-run cell below
frame_number = -1
video.set(cv.CAP_PROP_POS_FRAMES, 0)

True

In [15]:
ret = True
results = pd.read_csv('./results.csv')
while ret:
    ret, frame = video.read()
    frame_number += 1
    if ret:
        df_ = results[results['frame_number'] == frame_number]
        for index in range(len(df_)):
            
            # draw label box
            label_x1, label_y1, label_x2, label_y2 = ast.literal_eval(df_.iloc[index]['label_bbox'].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))

            # region of interest
            cv.rectangle(frame, (int(label_x1), int(label_y1)), (int(label_x2), int(label_y2)), (0, 0, 255), 6)

            # write detected number
            (text_width, text_height), _ = cv.getTextSize(
                str(df_.iloc[index]['read_label']),
                cv.FONT_HERSHEY_SIMPLEX,
                2,
                6)

            cv.putText(
                frame,
                str(df_.iloc[index]['read_label']),
                (int((label_x2 + label_x1 - text_width)/2), int(label_y1 - text_height)),
                cv.FONT_HERSHEY_SIMPLEX,
                2,
                (0, 255, 0),
                6
            )

        out.write(frame)
        frame = cv.resize(frame, (1280, 720))

out.release()
video.release()