In [None]:
!pip install opencv-python-headless numpy ultralytics inference-sdk easyocr supervision tensorflow

Collecting ultralytics
  Downloading ultralytics-8.3.50-py3-none-any.whl.metadata (35 kB)
Collecting inference-sdk
  Downloading inference_sdk-0.31.1-py3-none-any.whl.metadata (11 kB)
Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting supervision
  Downloading supervision-0.25.1-py3-none-any.whl.metadata (14 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Collecting dataclasses-json~=0.6.0 (from inference-sdk)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pillow>=7.1.2 (from ultralytics)
  Downloading pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Collecting aiohttp<=3.10.11,>=3.9.0 (from inference-sdk)
  Downloading aiohttp-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting backoff~=2.2.0 (from inference-sdk)
  Downloading backoff-2.2.1-py3-none-any.whl.metadat

In [None]:
!pip uninstall -y torch
!pip install torch==2.0.1

Found existing installation: torch 2.5.1+cu121
Uninstalling torch-2.5.1+cu121:
  Successfully uninstalled torch-2.5.1+cu121
Collecting torch==2.0.1
  Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl.metadata (24 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.1)
  Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.1)
  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cuda-cupti-cu11==11.7.101 (from torch==2.0.1)
  Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu11==8.5.0.96 (from torch==2.0.1)
  Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu11==11.10.3.66 (from torch==2.0.1)
  Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl.metadata

In [None]:
!pip install --upgrade ultralytics

Collecting torch>=1.8.0 (from ultralytics)
  Downloading torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-c

In [None]:
import os
import cv2
import numpy as np
import supervision as sv
import tensorflow as tf
import easyocr
import io
from PIL import Image
from collections import defaultdict, deque
from ultralytics import YOLO
from google.colab.patches import cv2_imshow
from inference_sdk import InferenceHTTPClient
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

In [None]:
# Hardcode paths and default values
SOURCE_VIDEO_PATH = "vehicles.mp4"   # Input video file
OUTPUT_VIDEO_PATH = 'output.mp4'  # Output video file
CONFIDENCE_THRESHOLD = 0.5
IOU_THRESHOLD = 0.4

In [None]:
# Define the source and target polygons
SOURCE = np.array([[1252, 787], [2298, 803], [5039, 2159], [-550, 2159]])
TARGET_WIDTH = 25
TARGET_HEIGHT = 250
TARGET = np.array(
    [
        [0, 0],
        [TARGET_WIDTH - 1, 0],
        [TARGET_WIDTH - 1, TARGET_HEIGHT - 1],
        [0, TARGET_HEIGHT - 1],
    ]
)

In [None]:
# Initialize view transformer (used for perspective correction)
class ViewTransformer:
    def __init__(self, source: np.ndarray, target: np.ndarray) -> None:
        source = source.astype(np.float32)
        target = target.astype(np.float32)
        self.m = cv2.getPerspectiveTransform(source, target)

    def transform_points(self, points: np.ndarray) -> np.ndarray:
        if points.size == 0:
            return points

        reshaped_points = points.reshape(-1, 1, 2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshaped_points, self.m)
        return transformed_points.reshape(-1, 2)

In [None]:
# Initialize the video input and output
video_info = sv.VideoInfo.from_video_path(video_path=SOURCE_VIDEO_PATH)
model = YOLO("yolov8n.pt")   # Automatically download YOLOv8 model if not already present

byte_track = sv.ByteTrack(
    frame_rate=video_info.fps, track_activation_threshold=CONFIDENCE_THRESHOLD
)
modelcustom = tf.keras.models.load_model('license_plate_classifier2.h5')
reader = easyocr.Reader(['en'])



In [None]:
# Setup annotators (for drawing on frames)
thickness = sv.calculate_optimal_line_thickness(resolution_wh=video_info.resolution_wh)
text_scale = sv.calculate_optimal_text_scale(resolution_wh=video_info.resolution_wh)
box_annotator = sv.BoxAnnotator(thickness=thickness)
label_annotator = sv.LabelAnnotator(
    text_scale=text_scale,
    text_thickness=thickness,
    text_position=sv.Position.BOTTOM_CENTER,
)
trace_annotator = sv.TraceAnnotator(
    thickness=thickness,
    trace_length=video_info.fps * 2,
    position=sv.Position.BOTTOM_CENTER,
)

In [None]:
polygon_zone = sv.PolygonZone(polygon=SOURCE)
view_transformer = ViewTransformer(source=SOURCE, target=TARGET)

In [None]:
# Initialize Roboflow client for license plate detection
client = InferenceHTTPClient(
    api_url="https://detect.roboflow.com",
    api_key="L5uh8sOgmpA0sglsoQzM"  # Replace with your actual API key
)
license_plate_model_id = "license-plate-recognition-rxg4e/6"
polygon_zone = sv.PolygonZone(polygon=SOURCE)
coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))
# Generate frames from the input video
frame_generator = sv.get_video_frames_generator(source_path=SOURCE_VIDEO_PATH)

In [None]:
def matricule(frame):
    try:
        # Perform inference on the frame using Roboflow's license plate detection model
        result = client.infer(frame, model_id=license_plate_model_id)
        return result
    except Exception as e:
        print(f"Error during inference: {e}")
        return None

In [None]:
def matricule_read_tun(cropped_plate, reader):

    # Step 1: Upscale the image
    scale_factor = 2
    new_width = int(cropped_plate.shape[1] * scale_factor)
    new_height = int(cropped_plate.shape[0] * scale_factor)
    upscaled_image = cv2.resize(cropped_plate, (new_width, new_height), interpolation=cv2.INTER_CUBIC)

    # Step 2: Define cropping areas (removing the middle part)
    image_height, image_width = upscaled_image.shape[:2]
    middle_start = int(image_width * 0.4)
    middle_end = int(image_width * 0.6)

    left_part = upscaled_image[:, :middle_start]  # Left part
    right_part = upscaled_image[:, middle_end:]   # Right part

    # Convert to grayscale
    left_gray = cv2.cvtColor(left_part, cv2.COLOR_BGR2GRAY)
    right_gray = cv2.cvtColor(right_part, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur
    left_blurred = cv2.GaussianBlur(left_gray, (5, 5), 0)
    right_blurred = cv2.GaussianBlur(right_gray, (5, 5), 0)

    # Variables to store the best results for left and right parts
    best_confidence_left = 0
    best_confidence_right = 0
    best_result_left = None
    best_result_right = None

    # Iterate over threshold values
    for threshold_value in range(30, 200, 5):
        _, left_threshold = cv2.threshold(left_blurred, threshold_value, 255, cv2.THRESH_BINARY)
        _, right_threshold = cv2.threshold(right_blurred, threshold_value, 255, cv2.THRESH_BINARY)

        # Perform OCR
        left_result = [res for res in reader.readtext(left_threshold) if res[1].isdigit()]
        right_result = [res for res in reader.readtext(right_threshold) if res[1].isdigit()]

        # Calculate confidence for left and right results
        confidence_left = sum(item[2] for item in left_result) / len(left_result) if left_result else 0
        confidence_right = sum(item[2] for item in right_result) / len(right_result) if right_result else 0

        # Update the best result for left part
        if confidence_left > best_confidence_left:
            best_confidence_left = confidence_left
            best_result_left = left_result

        # Update the best result for right part
        if confidence_right > best_confidence_right:
            best_confidence_right = confidence_right
            best_result_right = right_result

    # Extract the best numeric results
    left_number = int(best_result_left[0][1]) if best_result_left else None
    right_number = int(best_result_right[0][1]) if best_result_right else None

    # Return the results as a dictionary
    return {
        "left": {"number": left_number, "confidence": best_confidence_left},
        "right": {"number": right_number, "confidence": best_confidence_right},
    }

In [None]:
def matricule_read_other(cropped_plate, reader):

    # Step 1: Upscale the image
    scale_factor = 2
    new_width = int(cropped_plate.shape[1] * scale_factor)
    new_height = int(cropped_plate.shape[0] * scale_factor)
    upscaled_image = cv2.resize(cropped_plate, (new_width, new_height), interpolation=cv2.INTER_CUBIC)

    # Convert to grayscale
    gray_image = cv2.cvtColor(upscaled_image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur
    blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0)

    # Variables to store the best result
    best_confidence = 0
    best_result = None

    # Iterate over threshold values
    for threshold_value in range(30, 200, 5):
        _, thresholded_image = cv2.threshold(blurred_image, threshold_value, 255, cv2.THRESH_BINARY)

        # Perform OCR
        results = [res for res in reader.readtext(thresholded_image) if res[1].isdigit()]

        # Calculate confidence for the results
        if results:
            confidence = sum(item[2] for item in results) / len(results)

            # Update the best result
            if confidence > best_confidence:
                best_confidence = confidence
                best_result = results

    # Extract the best numeric result
    value = int(best_result[0][1]) if best_result else None

    return {"value": value, "confidence": best_confidence}


In [None]:
def licensetype(image_array, modelcustom):
    # Resize the image to (128, 128) and convert it to grayscale
    image_size = (128, 128)
    img = array_to_img(image_array)  # Convert NumPy array to PIL Image
    img = img.resize(image_size).convert("L")  # Resize and convert to grayscale

    # Preprocess the image
    img_array = img_to_array(img)  # Convert image to array
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = img_array / 255.0  # Normalize the image (same as training)

    # Make a prediction
    prediction = modelcustom.predict(img_array)

    # Output the result based on the threshold
    return 0 if prediction[0] > 0.5 else 1

In [None]:
# Process video frames
with sv.VideoSink(OUTPUT_VIDEO_PATH, video_info) as sink:
    frame_count = 0
    for frame in frame_generator:
        frame_count += 1
        # Model inference for vehicle detection
        result = model(frame)[0]
        detections = sv.Detections.from_ultralytics(result)

        # Filter detections by confidence threshold
        detections = detections[detections.confidence > CONFIDENCE_THRESHOLD]

        # Trigger zone filter
        detections = detections[polygon_zone.trigger(detections)]

        # Apply Non-Maximum Suppression (NMS)
        detections = detections.with_nms(threshold=IOU_THRESHOLD)

        # Update tracker
        detections = byte_track.update_with_detections(detections=detections)

        # Transform tracking points
        points = detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER)
        points = view_transformer.transform_points(points=points).astype(int)

        for tracker_id, [_, y] in zip(detections.tracker_id, points):
            coordinates[tracker_id].append(y)

        # Start frame annotation
        annotated_frame = frame.copy()

        # License plate detection and annotation
        if detections.xyxy is not None and len(detections.xyxy) > 0:
            for i, bbox in enumerate(detections.xyxy):
                # Car bounding box coordinates
                x1, y1, x2, y2 = map(int, bbox)
                cropped_car = frame[y1:y2, x1:x2]  # Crop car from frame

                # Ensure cropped car is valid
                if cropped_car.size == 0:
                    continue

                # Detect license plates within the cropped car
                result = matricule(cropped_car)
                if result and "predictions" in result:
                    # Initialize a dictionary to track best confidence and corresponding values for each license plate
                    plate_results = {}
                    predictions = result["predictions"]
                    for prediction in predictions:
                        # License plate bounding box (relative to cropped_car)
                        lx, ly, lwidth, lheight = (
                            prediction["x"],
                            prediction["y"],
                            prediction["width"],
                            prediction["height"],
                        )

                        # Convert center-based box to corner-based coordinates
                        lleft = int(lx - lwidth / 2)
                        ltop = int(ly - lheight / 2)
                        lright = int(lx + lwidth / 2)
                        lbottom = int(ly + lheight / 2)

                        # Adjust to global frame coordinates
                        plate_left = lleft + x1
                        plate_top = ltop + y1
                        plate_right = lright + x1
                        plate_bottom = lbottom + y1

                        # Crop license plate from the frame
                        cropped_plate = frame[plate_top:plate_bottom, plate_left:plate_right]

                        # Validate cropped license plate
                        if cropped_plate.size == 0:
                            continue

                        # Process every 15th frame
                        if frame_count % 15 == 0:

                            # Pass the buffer to the licensetype function
                            license_type = licensetype(cropped_plate, modelcustom)  # Determine license type

                            if license_type == 0:  # Tunisian plates
                                read_result = matricule_read_tun(cropped_plate,reader)
                                if read_result:
                                    left_number = read_result.get("left_number", "Unknown")
                                    right_number = read_result.get("right_number", "Unknown")
                                    label = f"{left_number}تونس{right_number}"
                                    confidence = read_result.get("confidence", 0.0)

                            else:  # Other plates
                                read_result = matricule_read_other(cropped_plate,reader)
                                if read_result:
                                    plate_value = read_result.get("value", "Unknown")
                                    label = f"value: {plate_value}"
                                    confidence = read_result.get("confidence", 0.0)

                            # Update the best confidence value for the plate
                            plate_key = (plate_left, plate_top, plate_right, plate_bottom)
                            if (
                                plate_key not in plate_results
                                or confidence > plate_results[plate_key]["confidence"]
                            ):
                                plate_results[plate_key] = {
                                    "label": label,
                                    "confidence": confidence,
                                }

                        # Annotate license plate on the frame
                        plate_label = plate_results.get(
                            (plate_left, plate_top, plate_right, plate_bottom), {}
                        ).get("label", "Unknown")
                        license_plate_detections = sv.Detections(
                            xyxy=np.array([[plate_left, plate_top, plate_right, plate_bottom]]),
                            confidence=np.array([1.0]),  # Assign confidence for annotation
                            class_id=np.array([0]),  # Assign a class ID for license plates
                        )

                        # Draw bounding boxes
                        annotated_frame = box_annotator.annotate(
                            scene=annotated_frame,
                            detections=license_plate_detections,
                        )

                        # Overlay the label text above the bounding box
                        cv2.putText(
                            annotated_frame,
                            plate_label,
                            (plate_left, plate_top - 10),  # Position text above the bounding box
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.5,  # Font scale
                            (255, 0, 0),  # Font color (blue in BGR)
                            2,  # Thickness
                            cv2.LINE_AA,
                        )

        # Write annotated frame to output video
        sink.write_frame(annotated_frame)

NameError: name 'FPS' is not defined

In [None]:
plate_results = {}
coordinates = {}
speed_data = {}  # To store car tracking and speed

# Process video frames
with sv.VideoSink(OUTPUT_VIDEO_PATH, video_info) as sink:
    frame_count = 0
    for frame in frame_generator:
        frame_count += 1

        # Model inference for vehicle detection
        result = model(frame)[0]
        detections = sv.Detections.from_ultralytics(result)

        # Filter detections by confidence threshold
        detections = detections[detections.confidence > CONFIDENCE_THRESHOLD]

        # Trigger zone filter
        detections = detections[polygon_zone.trigger(detections)]

        # Apply Non-Maximum Suppression (NMS)
        detections = detections.with_nms(threshold=IOU_THRESHOLD)

        # Update tracker
        detections = byte_track.update_with_detections(detections=detections)

        # Transform tracking points
        points = detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER)
        points = view_transformer.transform_points(points=points).astype(int)

        for tracker_id, [_, y] in zip(detections.tracker_id, points):
            if tracker_id not in coordinates:
                coordinates[tracker_id] = []
            coordinates[tracker_id].append(y)

            # Calculate speed based on change in y-coordinate over frames
            if len(coordinates[tracker_id]) > 1:
                previous_y = coordinates[tracker_id][-2]
                current_y = y
                speed = abs(current_y - previous_y) * 0.5  # Simplified speed calculation
                speed_data[tracker_id] = speed

        # Start frame annotation
        annotated_frame = frame.copy()

        # License plate detection and annotation
        if detections.xyxy is not None and len(detections.xyxy) > 0:
            for i, bbox in enumerate(detections.xyxy):
                # Car bounding box coordinates
                x1, y1, x2, y2 = map(int, bbox)
                cropped_car = frame[y1:y2, x1:x2]  # Crop car from frame

                # Ensure cropped car is valid
                if cropped_car.size == 0:
                    continue

                # Detect license plates within the cropped car
                result = matricule(cropped_car)
                if result and "predictions" in result:
                    predictions = result["predictions"]
                    for prediction in predictions:
                        # License plate bounding box
                        lx, ly, lwidth, lheight = (
                            prediction["x"],
                            prediction["y"],
                            prediction["width"],
                            prediction["height"],
                        )
                        lleft = int(lx - lwidth / 2)
                        ltop = int(ly - lheight / 2)
                        lright = int(lx + lwidth / 2)
                        lbottom = int(ly + lheight / 2)

                        # Adjust to global frame coordinates
                        plate_left = lleft + x1
                        plate_top = ltop + y1
                        plate_right = lright + x1
                        plate_bottom = lbottom + y1

                        cropped_plate = frame[plate_top:plate_bottom, plate_left:plate_right]
                        if cropped_plate.size == 0:
                            continue

                        if frame_count % 15 == 0:
                            # Pass cropped plate to recognition functions
                            license_type = licensetype(cropped_plate, modelcustom)
                            if license_type == 0:  # Tunisian plates
                                read_result = matricule_read_tun(cropped_plate, reader)
                                if read_result:
                                    left_number = read_result.get("left_number", "Unknown")
                                    right_number = read_result.get("right_number", "Unknown")
                                    label = f"{left_number}تونس{right_number}"
                                    confidence = read_result.get("confidence", 0.0)
                            else:  # Other plates
                                read_result = matricule_read_other(cropped_plate, reader)
                                if read_result:
                                    plate_value = read_result.get("value", "Unknown")
                                    label = f"value: {plate_value}"
                                    confidence = read_result.get("confidence", 0.0)

                            plate_key = (plate_left, plate_top, plate_right, plate_bottom)
                            if (
                                plate_key not in plate_results
                                or confidence > plate_results[plate_key]["confidence"]
                            ):
                                plate_results[plate_key] = {
                                    "label": label,
                                    "confidence": confidence,
                                }

                        # Annotate car with speed and license plate
                        car_speed = speed_data.get(detections.tracker_id[i], 0.0)
                        plate_label = plate_results.get(
                            (plate_left, plate_top, plate_right, plate_bottom), {}
                        ).get("label", "Unknown")

                        full_label = f"ID: {detections.tracker_id[i]} | Speed: {car_speed:.2f} px/frame | {plate_label}"

                        license_plate_detections = sv.Detections(
                            xyxy=np.array([[plate_left, plate_top, plate_right, plate_bottom]]),
                            confidence=np.array([1.0]),
                            class_id=np.array([0]),
                        )

                        # Draw bounding boxes and labels
                        annotated_frame = box_annotator.annotate(
                            scene=annotated_frame,
                            detections=license_plate_detections,
                        )
                        cv2.putText(
                            annotated_frame,
                            full_label,
                            (plate_left, plate_top - 10),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.5,
                            (0, 255, 0),  # Green for text
                            2,
                            cv2.LINE_AA,
                        )

        # Write annotated frame to output video
        sink.write_frame(annotated_frame)


0: 384x640 4 cars, 2 trucks, 201.9ms
Speed: 6.1ms preprocess, 201.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 trucks, 190.3ms
Speed: 10.5ms preprocess, 190.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 trucks, 181.2ms
Speed: 6.6ms preprocess, 181.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 2 trucks, 163.3ms
Speed: 8.7ms preprocess, 163.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 2 trucks, 170.3ms
Speed: 4.6ms preprocess, 170.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 2 trucks, 183.4ms
Speed: 6.0ms preprocess, 183.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 trucks, 176.6ms
Speed: 4.6ms preprocess, 176.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 trucks, 259.0ms
Speed: 6.