# License Plate Detection

In [1]:
# installing missing YOLO dependencies
%pip install lapx>=0.5.2
# installing OCR library
%pip install ultralytics
%pip install mltu

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 23.3.2
[notice] To update, run: c:\Users\avint\.pyenv\pyenv-win\versions\3.10.11\python.exe -m pip install --upgrade pip






[notice] A new release of pip is available: 23.0.1 -> 23.3.2
[notice] To update, run: c:\Users\avint\.pyenv\pyenv-win\versions\3.10.11\python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 23.3.2
[notice] To update, run: c:\Users\avint\.pyenv\pyenv-win\versions\3.10.11\python.exe -m pip install --upgrade pip


We are importing the following libraries:
* **ast** for parsing the bounding boxes
* **cv2** for video processing
* **easyocr** for OCR
* **glob** for finding files
* **numpy** for array operations
* **pandas** for dataframes
* **string** for string operations
* **ultralytics** for **YOLO** for object detection

In [2]:
import ast
import cv2 as cv
from glob import glob
import numpy as np
import pandas as pd
import string
from ultralytics import YOLO
from mltu.inferenceModel import OnnxInferenceModel
from mltu.utils.text_utils import ctc_decoder, get_cer

## License Plate Detection

**YOLOv8** is capable of detecting cars, buses and trucks very easily without additional trainings from the dataset.It is already trained from the COCO dataset.But license number plates seem to be a bit harder. The model often confuses street signs or just basic backgound noise as a car registration plate. 
<br/>
<br/>
To make things more efficient, we are combining both models - a regular COCO trained YOLOv8 and our number plate detector.If the COCO model spots a car, we will then execute the number plate detector to focus its search within the area marked out by the first model's bounding box. That way, we are only seaarching for number plates when there is a car in the picture.

This is a regular COCO trained YOLOv8 model for car detection.<br/>
`coco_model = YOLO('yolov8n.pt')`

This is our custom model trained on the License Plate Dataset.<br/>
`np_model = YOLO('../model/runs/detect/train/weights/best.pt')`

*best.pt* weight is produced by training our model with +21000 annoted images of license plates for 3 epochs.

In [3]:
# Here we are mapping dictionaries for character conversion.
# If we know that the first character in the number plate always is an string eg. `O` then if our OCR reader reads that O as `0` then it would be a mistake.
# To prevent this we are mapping dictionaries with similar keys and values.

dict_char_to_int = {'O': '0',
                    'I': '1',
                    'J': '3',
                    'A': '4',
                    'G': '6',
                    'S': '5'}

dict_int_to_char = {'0': 'O',
                    '1': 'I',
                    '3': 'J',
                    '4': 'A',
                    '6': 'G',
                    '5': 'S'}

In [4]:
# license_complies_format is a function that checks if the license plate complies with the specified format.
# In this case the format is `[A-Z][A-Z][0-9][0-9][A-Z][A-Z][A-Z]`.
# We can change this format for specific use cases. For example, now it is configured for UK number plates. We can change the format according to Nepali number plates for our use.
# The above character conversion comes handy in this situation where if we are sure that in the second letter of our text we should get a string then if our OCR Reader reads a integer that looks similar to a alphabet maybe 4 then we can neglect the '4 and read 'A' instead.
def license_complies_format(text):
    # It returnsTrue if the license plate complies with the format, False otherwise.
    if len(text) != 7:
        return False

    if (text[0] in string.ascii_uppercase or text[0] in dict_int_to_char.keys()) and \
       (text[1] in string.ascii_uppercase or text[1] in dict_int_to_char.keys()) and \
       (text[2] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[2] in dict_char_to_int.keys()) and \
       (text[3] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] or text[3] in dict_char_to_int.keys()) and \
       (text[4] in string.ascii_uppercase or text[4] in dict_int_to_char.keys()) and \
       (text[5] in string.ascii_uppercase or text[5] in dict_int_to_char.keys()) and \
       (text[6] in string.ascii_uppercase or text[6] in dict_int_to_char.keys()):
        return True
    else:
        return False

In [5]:
def format_license(text):
    license_plate_ = ''
    mapping = {0: dict_int_to_char, 1: dict_int_to_char, 4: dict_int_to_char, 5: dict_int_to_char, 6: dict_int_to_char,
               2: dict_char_to_int, 3: dict_char_to_int}
    for j in [0, 1, 2, 3, 4, 5, 6]:
        if text[j] in mapping[j].keys():
            license_plate_ += mapping[j][text[j]]
        else:
            license_plate_ += text[j]

    return license_plate_

In [6]:
class ImageToWordModel(OnnxInferenceModel):
    def __init__(self, char_list, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.char_list = char_list

    def predict(self, image):
        image = cv.resize(image, self.input_shape[:2][::-1])
        image_pred = np.expand_dims(image, axis=0).astype(np.float32)
        preds = self.model.run(None, {self.input_name: image_pred})[0]
        text = ctc_decoder(preds, self.char_list)[0]
        text = text.replace(" ", "")
        text = text.replace("_", "")
        text = text.upper()
        if license_complies_format(text):
            # bring text into the default license plate format
            return format_license(text)
        
        return None

In [7]:
image_to_word_model = ImageToWordModel(model_path="C:/Users/avint/Desktop/model.onnx", char_list="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")

In [8]:
coco_model = YOLO('yolov8n.pt')
np_model = YOLO('../model/runs/detect/train/weights/best.pt')

The input video is read by glob. Glob is a function that returns all the pathnames matching a pattern.

In [9]:
videos = glob('./inputs/sample.mp4')
print(videos)

['./inputs/sample.mp4']


### STEP 1 Implementing the Car Detection

Get the bounding boxes of all vehicles in our video recording with prediction confidence score and object tracking ID

This code currently gathers all the bounding boxes for vehicles in the video and stores them in the `vehicle_bounding_boxes` list. Along with the bounding box coordinates, this list also includes the tracking ID assigned to each identified vehicle. The tracking ID remains consistent from frame to frame, serving as a unique identifier. Additionally, the score indicates the model's confidence level that the particular bounding box indeed contains a vehicle, with values ranging from 0 to 1.

### STEP 2 Implementing the License Plate Detection

Use the bounding box for each vehicle and use the number plate detector model to try to find the corresponding plate within in the confinement of those boxes.

### STEP 3 Preprocess License Plates

### STEP 4 Read License Plates

In [10]:
# write_csv is a function that writes the obtained results to a CSV file using the specified format.
# Here we are formatting the colunms as [frame_number, track_id, car_bbox, car_bbox_score, license_plate_bbox, license_plate_bbox_score, license_plate_number, license_text_score].
# car_bbox and license_plate_bbox has 4 array that stores the coordinate of the bounding box.

def write_csv(results, output_path):
    
    with open(output_path, 'w') as f:
        f.write('{},{},{},{},{},{},{},{}\n'.format(
            'frame_number', 'track_id', 'car_bbox', 'car_bbox_score',
            'license_plate_bbox', 'license_plate_bbox_score', 'license_plate_number',
            'license_text_score'))

        for frame_number in results.keys():
            for track_id in results[frame_number].keys():
                print(results[frame_number][track_id])
                if 'car' in results[frame_number][track_id].keys() and \
                   'license_plate' in results[frame_number][track_id].keys() and \
                   'number' in results[frame_number][track_id]['license_plate'].keys():
                    f.write('{},{},{},{},{},{},{},{}\n'.format(
                        frame_number,
                        track_id,
                        '[{} {} {} {}]'.format(
                            results[frame_number][track_id]['car']['bbox'][0],
                            results[frame_number][track_id]['car']['bbox'][1],
                            results[frame_number][track_id]['car']['bbox'][2],
                            results[frame_number][track_id]['car']['bbox'][3]
                        ),
                        results[frame_number][track_id]['car']['bbox_score'],
                        '[{} {} {} {}]'.format(
                            results[frame_number][track_id]['license_plate']['bbox'][0],
                            results[frame_number][track_id]['license_plate']['bbox'][1],
                            results[frame_number][track_id]['license_plate']['bbox'][2],
                            results[frame_number][track_id]['license_plate']['bbox'][3]
                        ),
                        results[frame_number][track_id]['license_plate']['bbox_score'],
                        results[frame_number][track_id]['license_plate']['number'],
                        results[frame_number][track_id]['license_plate']['text_score'])
                    )
        f.close()

### STEP 5 Clean-Up License Plate Format

This returns a list with bounding box metrics for every frame with a successful detection.

In [12]:
results = {}

# read video by index
video = cv.VideoCapture(videos[0])

ret = True
frame_number = -1
vehicles = [2,3,5]

# read the entire video
while ret:
    ret, frame = video.read()
    frame_number += 1
    if ret:
        results[frame_number] = {}
        
        # vehicle detector
        detections = coco_model.track(frame, persist=True)[0]
        for detection in detections.boxes.data.tolist():
            x1, y1, x2, y2, track_id, score, class_id = detection
            if int(class_id) in vehicles and score > 0.5:
                vehicle_bounding_boxes = []
                vehicle_bounding_boxes.append([x1, y1, x2, y2, track_id, score])
                for bbox in vehicle_bounding_boxes:
                    print(bbox)
                    roi = frame[int(y1):int(y2), int(x1):int(x2)]
                    
                    # license plate detector for region of interest
                    license_plates = np_model(frame)[0]
                    # process license plate
                    for license_plate in license_plates.boxes.data.tolist():
                        plate_x1, plate_y1, plate_x2, plate_y2, plate_score, _ = license_plate
                        # crop plate from region of interest
                        # plate = roi[int(plate_y1+30):int(plate_y2-30), int(plate_x1+10):int(plate_x2-10)]
                        plate = roi[int(plate_y1):int(plate_y2), int(plate_x1):int(plate_x2)]
                        # cv.imwrite('outputs/plates/roi/'+str(track_id)+ '.jpg', plate)
                        prediction_text = image_to_word_model.predict(plate)
                        # if plate could be read write results
                        if prediction_text is not None:
                            results[frame_number][track_id] = {
                                'car': {
                                    'bbox': [x1, y1, x2, y2],
                                    'bbox_score': score
                                },
                                'license_plate': {
                                    'bbox': [plate_x1, plate_y1, plate_x2, plate_y2],
                                    'bbox_score': plate_score,
                                    'number': prediction_text,
                                    'text_score': 1.0
                                }
                            }

write_csv(results, './outputs/resultsIMG.csv')
video.release()




0: 384x640 2 persons, 3 cars, 71.8ms
Speed: 2.8ms preprocess, 71.8ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)
[885.9718017578125, 259.9759216308594, 1918.0755615234375, 1069.1480712890625, 1.0, 0.9230321049690247]

0: 384x640 1 License_Plate, 66.9ms
Speed: 4.2ms preprocess, 66.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)
[0.256805419921875, 368.2144470214844, 288.8199768066406, 804.2828369140625, 2.0, 0.8570149540901184]

0: 384x640 1 License_Plate, 74.3ms
Speed: 1.3ms preprocess, 74.3ms inference, 5.7ms postprocess per image at shape (1, 3, 384, 640)


error: OpenCV(4.9.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


In [None]:
results = pd.read_csv('./outputs/resultsIMG.csv')

# show results for tracking ID `1` - sort by OCR prediction confidence
results[results['track_id'] == 1.].sort_values(by='license_text_score', ascending=False)

Unnamed: 0,frame_number,track_id,car_bbox,car_bbox_score,license_plate_bbox,license_plate_bbox_score,license_plate_number,license_text_score


### STEP 6 Visualize the Results

In [None]:
def draw_border(img, top_left, bottom_right, color=(0, 255, 0), thickness=6, line_length_x=200, line_length_y=200):
    x1, y1 = top_left
    x2, y2 = bottom_right

    cv.line(img, (x1, y1), (x1, y1 + line_length_y), color, thickness)  #-- top-left
    cv.line(img, (x1, y1), (x1 + line_length_x, y1), color, thickness)

    cv.line(img, (x1, y2), (x1, y2 - line_length_y), color, thickness)  #-- bottom-left
    cv.line(img, (x1, y2), (x1 + line_length_x, y2), color, thickness)

    cv.line(img, (x2, y1), (x2 - line_length_x, y1), color, thickness)  #-- top-right
    cv.line(img, (x2, y1), (x2, y1 + line_length_y), color, thickness)

    cv.line(img, (x2, y2), (x2, y2 - line_length_y), color, thickness)  #-- bottom-right
    cv.line(img, (x2, y2), (x2 - line_length_x, y2), color, thickness)

    return img

In [None]:
# read video by index
video = cv.VideoCapture(videos[0])

# get video dims
frame_width = int(video.get(3))
frame_height = int(video.get(4))
size = (frame_width, frame_height)

# Define the codec and create VideoWriter object
fourcc = cv.VideoWriter_fourcc(*'mp4v')
out = cv.VideoWriter('./outputs/processedIMG.mp4', fourcc, 20.0, size)

# reset video before you re-run cell below
frame_number = -1
video.set(cv.CAP_PROP_POS_FRAMES, 0)

True

In [None]:
ret = True

while ret:
    ret, frame = video.read()
    frame_number += 1
    if ret:
        df_ = results[results['frame_number'] == frame_number]
        for index in range(len(df_)):
            # draw car
            vhcl_x1, vhcl_y1, vhcl_x2, vhcl_y2 = ast.literal_eval(df_.iloc[index]['car_bbox'].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))
            
            draw_border(
                frame, (int(vhcl_x1), int(vhcl_y1)),
                (int(vhcl_x2), int(vhcl_y2)), (0, 255, 0),
                12, line_length_x=200, line_length_y=200)
            
            # draw license plate
            plate_x1, plate_y1, plate_x2, plate_y2 = ast.literal_eval(df_.iloc[index]['license_plate_bbox'].replace('[ ', '[').replace('   ', ' ').replace('  ', ' ').replace(' ', ','))

            # region of interest for license plate
            roi = frame[int(vhcl_y1):int(vhcl_y2), int(vhcl_x1):int(vhcl_x2)]
            cv.rectangle(roi, (int(plate_x1), int(plate_y1)), (int(plate_x2), int(plate_y2)), (0, 0, 255), 6)
            #endregion
            # write detected number
            (text_width, text_height), _ = cv.getTextSize(
                df_.iloc[index]['license_plate_number'],
                cv.FONT_HERSHEY_SIMPLEX,
                2,
                6)

            cv.putText(
                frame,
                df_.iloc[index]['license_plate_number'],
                (int((vhcl_x2 + vhcl_x1 - text_width)/2), int(vhcl_y1 - text_height)),
                cv.FONT_HERSHEY_SIMPLEX,
                2,
                (0, 255, 0),
                6
            )

        out.write(frame)
        frame = cv.resize(frame, (1280, 720))

out.release()
video.release()

In [None]:
import pandas as pd
import numpy as np

# Assuming your input data is stored in a CSV file named 'data.csv'
# You can adjust the file name or provide the data directly if it's not in a file
data = pd.read_csv('./outputs/resultsIMG.csv')

# Convert 'license_text_score' to numeric
data['license_text_score'] = pd.to_numeric(data['license_text_score'], errors='coerce')

# Calculate the total sum of license_text_score for each license_plate_number
total_license_score = data.groupby('license_plate_number')['license_text_score'].sum()

# Find the row with the maximum license_plate_score for each license_plate_number
max_license_score_row = data.loc[data.groupby('license_plate_number')['license_text_score'].idxmax()]

# Merge the two DataFrames on license_plate_number
result = pd.merge(max_license_score_row[['license_plate_number', 'track_id']], total_license_score.reset_index(),
                  on='license_plate_number', how='inner')

# Find the row with the maximum license_text_score for each track_id
max_license_score_row = result.loc[result.groupby('track_id')['license_text_score'].idxmax()]

# Display the result
print(max_license_score_row)


  license_plate_number  track_id  license_text_score
3              TE60EIT      23.0                 2.0
2              RO05AMR      35.0                 1.0
0              IS55ESM      51.0                 2.0
