## Practice MediaPipe

1. Input continuous images from 'car.mp4'.
2. For each frame, detect every car using YOLOv8 trained data 'license_plate_detector.pt'. (mark with red rectangles)
3. For each car, detect a licence plate using 'yolov8n.pt'. (mark with blue rectangle)
4. For each licence plate, OCR using Tesseract.
5. Print the recognized licence plate number above each detected licence plate. (putText() in green color).
6. Use whatever you learned this semester to improve the result
7. Upload your .ipynb file.

In [14]:
from ultralytics import YOLO
import cv2
import pytesseract
import numpy as np


In [15]:
# Function to draw bounding boxes and text
def draw_annotations(frame, annotations, color):
    for (x1, y1, x2, y2, text) in annotations:
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
        if text:
            cv2.putText(frame, text, (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

In [16]:
# Specify the Tesseract executable path
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

# Load models
car_detector = YOLO('yolov8n.pt')
license_plate_detector = YOLO('license_plate_detector.pt')

In [19]:
# Load video
cap = cv2.VideoCapture('car.mp4')

# Define the list of vehicle class IDs (as per your model's class mapping)
vehicles = [2, 3, 5, 7]

# Read frames
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Detect vehicles
    car_detections = car_detector(frame)[0]
    car_annotations = []
    for detection in car_detections.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = detection
        if int(class_id) in vehicles:
            car_annotations.append((x1, y1, x2, y2, f'Car: {int(score * 100)}%'))

    # Detect license plates
    license_plate_detections = license_plate_detector(frame)[0]
    license_plate_annotations = []
    for license_plate in license_plate_detections.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = license_plate
        license_plate_crop = frame[int(y1):int(y2), int(x1): int(x2), :]
        license_plate_text = pytesseract.image_to_string(license_plate_crop, config='--psm 8').strip()
        license_plate_annotations.append((x1, y1, x2, y2, license_plate_text))

    # Draw annotations
    draw_annotations(frame, car_annotations, (0, 0, 255))  # Red rectangles for cars
    draw_annotations(frame, license_plate_annotations, (255, 0, 0))  # Blue rectangles for license plates
    
    frame_resized = cv2.resize(frame, (800, 450))
    
    # Display the frame
    cv2.imshow('Frame', frame_resized)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


0: 384x640 21 cars, 1 bus, 2 trucks, 121.3ms
Speed: 7.1ms preprocess, 121.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 license_plates, 99.0ms
Speed: 3.2ms preprocess, 99.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 cars, 1 bus, 2 trucks, 110.9ms
Speed: 3.8ms preprocess, 110.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 license_plates, 95.9ms
Speed: 2.6ms preprocess, 95.9ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 cars, 1 bus, 2 trucks, 102.8ms
Speed: 3.7ms preprocess, 102.8ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 license_plates, 83.2ms
Speed: 2.4ms preprocess, 83.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 cars, 1 bus, 2 trucks, 82.3ms
Speed: 3.8ms preprocess, 82.3ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 license_pl