In [3]:
import torch
import cv2
import numpy as np
from ultralytics import YOLO
import os

In [20]:
# Path to your video frames and label files
frames_dir = 'D:\Datasets\DAta_intern\V8T3\images'
labels_dir = 'D:\Datasets\DAta_intern\V8T3\labels'
model_path = "D:\VS Code Folders\Squash-Ball_2\Results\\results_after_75_epochs\weights\\best.pt"

In [5]:
# Output video settings
output_video_path = 'output_video.avi'
output_fps = 30 

In [21]:
model = YOLO(model_path)

In [57]:
# Function to draw ground truth and predictions on each frame
def draw_detections(image, ground_truths, predictions):
    # Draw ground truth (green dot)
    for gt in ground_truths:
        x, y = int(gt[1]), int(gt[2])
        # cv2.MARKER_CROSS(image, (x, y), 3, (0, 255, 0), -1)  # Green dot
        cv2.drawMarker(image, (x, y), color=[0, 255, 0], thickness=1, 
        markerType= cv2.MARKER_CROSS, line_type=cv2.LINE_AA,
        markerSize=10)
        overlay = image.copy()
        cv2.circle(overlay, (x, y), 10, (0, 0, 255), -1)  # Filled red circle
        cv2.addWeighted(overlay, 0.3, image, 0.7, 0, image)  # Blend the overlay with less intensity

    # # Draw predictions (red larger circle)
    # for pred in predictions:
    #     x, y = int(pred[0]), int(pred[1])
    #     cv2.circle(image, (x, y), 10, (255, 0, 0), 2)  # Red circle
    
    return image

In [10]:
# Function to read annotations from label file
def read_annotations(label_file):
    with open(label_file, 'r') as file:
        annotations = []
        for line in file.readlines():
            parts = line.strip().split()
            class_id = int(parts[0])
            center_x = float(parts[1])
            center_y = float(parts[2])
            width = float(parts[3])
            height = float(parts[4])
            annotations.append((class_id, center_x, center_y, width, height))
        return annotations

In [11]:
# Function to convert normalized annotations to pixel values
def convert_annotations_to_pixels(annotations, image_width, image_height):
    pixel_annotations = []
    for annotation in annotations:
        class_id, center_x, center_y, width, height = annotation
        center_x = int(center_x * image_width)
        center_y = int(center_y * image_height)
        pixel_annotations.append((class_id, center_x, center_y, width, height))
    return pixel_annotations

In [22]:
# Function to check if prediction is inside the red circle
def is_inside_circle(pred_x, pred_y, gt_x, gt_y, radius=10):
    distance_squared = (pred_x - gt_x)**2 + (pred_y - gt_y)**2
    return distance_squared <= radius**2

In [18]:
# frame_paths = [frames_dir + "\\" + f for f in os.listdir(frames_dir) if f.endswith('.jpg')]
frame_paths = []
for frame in os.listdir(frames_dir):
    if frame.endswith('.jpg'):
        frame_paths.append(os.path.join(frames_dir, frame))

['D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100000_jpg.rf.b33e7348a3cfaedf538ba61465ad1400.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100001_jpg.rf.33caa5ac4e8c6330c6ff49d95d75950c.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100002_jpg.rf.b6c65b59a6eae2d732c8344f5df0b8bd.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100003_jpg.rf.49fec51f27a12f41eb6fbf7410fd8af7.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100004_jpg.rf.743e78067f6cc38145b34b2c36429506.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100005_jpg.rf.41ff6726969652a8e9a851b0a06d030a.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100006_jpg.rf.de7acfcdb852bfc4abfa49eeec392b1b.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100007_jpg.rf.769c3d529ae6eb46b6024aa184b9653d.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100008_jpg.rf.d7ae7b43a3ca6e5a990a4a76191226e9.jpg',
 'D:\\Datasets\\DAta_intern\\V8T3\\images\\frame_100009_jpg.rf.bdc2cfd6475ca0c6f6d

In [31]:
# Initialize video writer
frame = cv2.imread(frame_paths[0])
height, width, _ = frame.shape
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_video_path, fourcc, output_fps, (width, height))
print(height, width)

640 640


In [64]:
# Process each frame
for frame_path in frame_paths:
    # Read frame
    frame = cv2.imread(frame_path)
    
    # Read corresponding label file
    frame_name = os.path.splitext(os.path.basename(frame_path))[0]
    label_file = os.path.join(labels_dir, frame_name) + '.txt'
    
    ground_truths = read_annotations(label_file)
    ground_truths = convert_annotations_to_pixels(ground_truths, width, height)
    
    # Make predictions
    predictions = model.predict(frame)
    
    # Extracting bounding box center coordinates for predictions
    prediction_centers = []
    for pred in predictions[0].boxes:
        if pred.conf > 0.1:  # You can set a threshold for confidence
            bbox = pred.xyxy[0]  # xyxy format
            x_center = (bbox[0] + bbox[2]) / 2
            y_center = (bbox[1] + bbox[3]) / 2
            prediction_centers.append([x_center, y_center])
    
    # Draw ground truth and predictions on the frame
    annotated_frame = draw_detections(frame, ground_truths, prediction_centers)
    
    # Check true positives and false positives
    for pred in prediction_centers:
        pred_x, pred_y = int(pred[0]), int(pred[1])
        for gt in ground_truths:
            gt_x, gt_y = int(gt[0]), int(gt[1])
            if is_inside_circle(pred_x, pred_y, gt_x, gt_y, radius=10):
                # True positive (prediction inside the red circle)
                cv2.putText(annotated_frame, 'True Positive', (450, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
            else:
                # False positive (prediction outside the red circle)
                cv2.putText(annotated_frame, 'False Positive', (450, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
    
    # Write frame to the output video
    out.write(annotated_frame)
    
    # Display the frame (optional)
    cv2.imshow('Frame', annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
out.release()
cv2.destroyAllWindows()


0: 640x640 (no detections), 437.8ms
Speed: 66.0ms preprocess, 437.8ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 698.9ms
Speed: 36.0ms preprocess, 698.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 373.2ms
Speed: 12.0ms preprocess, 373.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 414.5ms
Speed: 7.0ms preprocess, 414.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 827.6ms
Speed: 9.0ms preprocess, 827.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 536.4ms
Speed: 14.0ms preprocess, 536.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 353.6ms
Speed: 10.0ms preprocess, 353.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 351.5ms
Speed: 11.0ms

In [62]:
len(prediction_centers)

1

In [49]:
results = model.predict('D:\Datasets\DAta_intern\V8T3\images\\frame_100283_jpg.rf.47d7eac528b605db604a8f6202517ba8.jpg')


image 1/1 D:\Datasets\DAta_intern\V8T3\images\frame_100283_jpg.rf.47d7eac528b605db604a8f6202517ba8.jpg: 640x640 1 ball, 340.9ms
Speed: 8.1ms preprocess, 340.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)


In [50]:
results

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'ball'}
 obb: None
 orig_img: array([[[  2,   2,   2],
         [  0,   0,   0],
         [  1,   1,   1],
         ...,
         [  0,   0,   4],
         [  0,   0,   4],
         [  0,   1,   5]],
 
        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ...,
         [  0,   0,   3],
         [  0,   0,   4],
         [  0,   0,   3]],
 
        [[166, 166, 166],
         [161, 161, 161],
         [159, 159, 159],
         ...,
         [149, 149, 155],
         [150, 149, 158],
         [150, 150, 156]],
 
        ...,
 
        [[116, 116, 116],
         [110, 112, 112],
         [105, 110, 109],
         ...,
         [133, 148, 157],
         [129, 144, 153],
         [121, 136, 145]],
 
        [[  7,   5,   4],
         [  5,   3,   2],
         [  1,   2,   0],
         ...,
         [  0,   0, 

In [47]:
results[0]

ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'ball'}
obb: None
orig_img: array([[[  2,   2,   2],
        [  0,   0,   0],
        [  1,   1,   1],
        ...,
        [  2,   1,   5],
        [  2,   1,   5],
        [  3,   2,   6]],

       [[  0,   0,   0],
        [  0,   0,   0],
        [  0,   0,   0],
        ...,
        [  0,   0,   3],
        [  0,   0,   4],
        [  0,   0,   3]],

       [[166, 166, 166],
        [161, 161, 161],
        [159, 159, 159],
        ...,
        [152, 150, 156],
        [152, 149, 158],
        [153, 151, 157]],

       ...,

       [[109, 117, 117],
        [105, 113, 113],
        [101, 109, 109],
        ...,
        [141, 153, 165],
        [137, 149, 161],
        [129, 141, 153]],

       [[  0,   5,   4],
        [  0,   4,   3],
        [  0,   3,   2],
        ...,
        [  0,   0,   8],
        [  0,   0,   6],
        [  0,  

In [51]:
results[0].boxes

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0.])
conf: tensor([0.4162])
data: tensor([[399.0138, 117.2492, 405.4528, 127.3184,   0.4162,   0.0000]])
id: None
is_track: False
orig_shape: (640, 640)
shape: torch.Size([1, 6])
xywh: tensor([[402.2333, 122.2838,   6.4390,  10.0692]])
xywhn: tensor([[0.6285, 0.1911, 0.0101, 0.0157]])
xyxy: tensor([[399.0138, 117.2492, 405.4528, 127.3184]])
xyxyn: tensor([[0.6235, 0.1832, 0.6335, 0.1989]])

In [54]:
print(results[0].boxes[0].xyxy[0])

tensor([399.0138, 117.2492, 405.4528, 127.3184])
