In [5]:
import cv2
import numpy as np
import torch
from transformers import AutoImageProcessor
from transformers import AutoModelForObjectDetection
from transformers import pipeline
from PIL import Image, ImageDraw

In [14]:
url = "./detr_licence/4.png"
image = Image.open(url).convert('RGB')

image_processor = AutoImageProcessor.from_pretrained("./detr-resnet-50_finetuned_licence/checkpoint-400")
model = AutoModelForObjectDetection.from_pretrained("./detr-resnet-50_finetuned_licence/checkpoint-400")
obj_detector = pipeline(task="object-detection", model="./detr_new_data")
result = obj_detector(image)

In [15]:
result

[{'score': 0.9740789532661438,
  'label': 'license_plate',
  'box': {'xmin': 88, 'ymin': 164, 'xmax': 205, 'ymax': 190}}]

In [17]:

yellow = (0, 255, 255) # in BGR 
font = cv2.FONT_HERSHEY_SIMPLEX
stroke = 2

# Path to the video file
video_path = 'TEST.mp4'

# Open the video
cap = cv2.VideoCapture(video_path)

# Check if video opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Read and display frames in a loop
while True:
    (ret, frame) = cap.read()  # Read a frame
    if not ret:
        print("Reached end of video or failed to read, exiting...")
        break  # Exit the loop if there are no frames to read

    # convert the image from NumPy array into a PIL image
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = Image.fromarray(img)

    inputs = image_processor(images = image, return_tensors = "pt")
    outputs = model(**inputs)

    target_sizes = torch.tensor([image.size[::-1]])
    results = image_processor.post_process_object_detection(
        outputs, 
        target_sizes = target_sizes, 
        threshold = 0.25)[0]
    print(results)
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        box = [round(i, 2) for i in box.tolist()]
        print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"        
        )

        # draw the bounding box
        cv2.rectangle(frame, 
                      (int(box[0]), int(box[1])),   # x1, y1
                      (int(box[2]), int(box[3])),   # x2, y2
                      yellow, 
                      stroke)
        
        # display the label
        cv2.putText(frame, 
                    model.config.id2label[label.item()], # label
                    (int(box[0]), int(box[1]-10)),       # x1, y1
                    font, 
                    1, 
                    yellow, 
                    stroke, 
                    cv2.LINE_AA)

    cv2.imshow('Video Playback', frame)  # Display the frame

    # Wait for 25 ms before moving on to the next frame
    # This will play the video at about 40 frames per second
    # Adjust the wait time as per your video's fps (use int(1000/fps))
    if cv2.waitKey(25) & 0xFF == ord('q'):
        print("Video playback interrupted by user.")
        break  # Exit the loop if 'q' is pressed
    key = cv2.waitKey(1) & 0xFF 
    if key == ord("q"):    # Press q to break out of the loop
        break

# Release the video capture object and close all OpenCV windows
cap.release()
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)

{'scores': tensor([0.3302], grad_fn=<IndexBackward0>), 'labels': tensor([1]), 'boxes': tensor([[2347.4832,  534.6648, 2556.5603,  857.0939]],
       grad_fn=<IndexBackward0>)}
Detected license with confidence 0.33 at location [2347.48, 534.66, 2556.56, 857.09]
{'scores': tensor([0.3212], grad_fn=<IndexBackward0>), 'labels': tensor([1]), 'boxes': tensor([[2368.7734,  538.8475, 2556.7698,  863.0591]],
       grad_fn=<IndexBackward0>)}
Detected license with confidence 0.321 at location [2368.77, 538.85, 2556.77, 863.06]
{'scores': tensor([0.3304], grad_fn=<IndexBackward0>), 'labels': tensor([1]), 'boxes': tensor([[2383.7166,  548.7296, 2557.0005,  882.5303]],
       grad_fn=<IndexBackward0>)}
Detected license with confidence 0.33 at location [2383.72, 548.73, 2557.0, 882.53]
{'scores': tensor([0.3135, 0.3069], grad_fn=<IndexBackward0>), 'labels': tensor([1, 1]), 'boxes': tensor([[2398.6152,  593.3559, 2543.7952,  820.2440],
        [2420.1313,  580.2976, 2557.9128,  897.8497]],
       gra