In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import easyocr                                  # We will use Deeplearning based OCR => EasyOCR
import re                                       # Regex = Regular Expression
from collections import defaultdict, deque

In [3]:
# Import our fine tuned model
model = YOLO("license_plate_best.pt")

In [None]:
# Import OCR reader [Will be language specific]
reader = easyocr.Reader(['en'],gpu=True)            # Specified English Language


In [None]:
# Pre-processing the image before its feed to the OCR
def plate_preprocess(plate_image):
    if plate_image.size == 0:
        return ""
    
    # Pre-process for OCR for converting it to Grey Scale
    grey_image = cv2.cvtColor(plate_image,cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(grey_image, (3, 3), 0)
    
    # Adaptive thresholding works better than fixed threshold
    thres = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    
    # Non-adaptive Thresholding
    #_,thres = cv2.threshold(grey_image, 64, 255, cv2.THRESH_BINARY_INV)
    
    # We resize to put more emphasis on the plate. Resizing adds new pixels so we use interpolation to smoothen the image out
    plate_resized = cv2.resize(thres,None,fx=3,fy=3,interpolation=cv2.INTER_CUBIC) 
    
    try:
        ocr_result = reader.readtext(
            plate_resized,
            detail=0,                                               # Will only give the text recognized for each item in the list
            paragraph=True,                                         # Will combine them into one
            allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890',       # Since Majority of License Plate are Capitalized
            
        )
        if ocr_result:
            cleaned_text = ocr_result[0].strip().upper().replace(" ","")
            return cleaned_text 
    except:
        pass
    
    return ""
        

In [None]:
# We create a buffer to get stable image for the OCR 
plate_history = defaultdict(lambda: deque(maxlen=12))   # Smaller buffer for faster response
plate_final = {}
plate_confidence = defaultdict(int)                     # Track confidence of final result

def get_stable_pattern(track_id, new_text):
    if new_text and len(new_text.replace(" ", "")) >= 3:  # Accept any text with 3+ characters
        # Clean the text but don't enforce format
        cleaned_text = new_text.strip().upper()
        
        if cleaned_text:  # If we have valid text
            plate_history[track_id].append(cleaned_text)
            
            # Count occurrences
            text_counts = {}
            for text in plate_history[track_id]:
                text_counts[text] = text_counts.get(text, 0) + 1
            
            # Get most common text and its count
            most_common = max(text_counts, key=text_counts.get)
            max_count = text_counts[most_common]
            
            # Only update final result if we have reasonable confidence
            if max_count >= 2:  # Require at least 2 occurrences (more responsive)
                if track_id not in plate_final or max_count > plate_confidence[track_id]:
                    plate_final[track_id] = most_common
                    plate_confidence[track_id] = max_count
    
    return plate_final.get(track_id, "")

In [None]:
def zoom_overlay(frame, x1, y1, x2, y2, text, track_id):
    # Get stable text
    stable_text = get_stable_pattern(track_id, text)
    
    # Show overlay if we have a stable reading with confidence >= 2
    if stable_text and plate_confidence[track_id] >= 2:
        overlay_h, overlay_w = 150, 400
        
        # Calculate overlay position above the plate
        oy1 = max(0, y1 - overlay_h - 40)
        ox1 = x1
        oy2, ox2 = oy1 + overlay_h, ox1 + overlay_w
        
        # Ensure overlay fits within frame bounds
        if ox2 > frame.shape[1]:
            ox1 = frame.shape[1] - overlay_w
            ox2 = frame.shape[1]
        
        # Check for valid cropping coordinates
        if y2 > y1 and x2 > x1:
            plate_crop = frame[y1:y2, x1:x2]
            
            # Resize and paste the zoomed-in plate
            if plate_crop is not None and plate_crop.size > 0:
                plate_resized = cv2.resize(plate_crop, (overlay_w, overlay_h))
                if oy1 >= 0 and oy2 <= frame.shape[0] and ox1 >= 0 and ox2 <= frame.shape[1]:
                    frame[oy1:oy2, ox1:ox2] = plate_resized

        # Show the stable text
        display_text = f"{stable_text}"
        
        # Draw text with outline for better visibility
        cv2.putText(frame, display_text, (ox1, oy1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 5)
        cv2.putText(frame, display_text, (ox1, oy1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 4)    

In [None]:
# Creating the input and output for the Video, along with Video Codex
input_video = "input_video.mp4"
output_video = "output_video.mp4"

cap = cv2.VideoCapture(input_video)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video,
                      fourcc,
                      cap.get(cv2.CAP_PROP_FPS),
                      (int(cap.get(3)),        # WIDTH of the FRAME from the CAPTURE
                      int(cap.get(4)))         # HEIGHT of the FRAME from the CAPTURE
                      )

CONF_THRES = 0.3


In [None]:
while cap.isOpened():
    ret,frame = cap.read()
    if not ret:
        print(f"Video Stream Ended or File not able to read..")
        break
    
    results = model.track(frame,persist=True,verbose=False)    # We pass the frame to the model
    
    for r in results:
        
        boxes = r.boxes
        
        if boxes.id is not None:
            track_ids = boxes.id.int().tolist() # We convert id's to int and keep them in a list
            
            # Using Vectorized approach for faster calculation
            high_conf_indices = [i for i, conf in enumerate(boxes.conf) if float(conf) > CONF_THRES]
            
            # interate through the indices that pass our CONF_THRES
            for i in high_conf_indices:
                box = boxes[i]
                track_id = track_ids[i]

                x1,y1,x2,y2 = box.xyxy.int().tolist()[0]
                
                # Crop the plate for the current box
                plate_crop = frame[y1:y2, x1:x2]
                
                # OCR with correction, processing a single image
                text = plate_preprocess(plate_crop)
                                
                # Draw rectangle around the number plate
                cv2.rectangle(frame,(x1,y1),(x2,y2),(0,255,0),3)

                # Get the stable text (Number) and overlay it on top the box
                zoom_overlay(frame, x1, y1, x2, y2, text, track_id)
                        
    out.write(frame)

cap.release()
out.release()

Video Stream Ended or File not able to read..
