In [61]:
import torch
import cv2
import numpy as np
from ultralytics import YOLO
import os

In [94]:
# Path to your video frames and label files
frames_dir = 'D:\Datasets\DAta_intern\V9T4\images'
labels_dir = 'D:\Datasets\DAta_intern\V9T4\labels'
model_path = "D:\VS Code Folders\Squash-Ball-Tracking\Squash-Ball-Detection\\results_after_75_epochs\weights\\best.pt"


In [95]:
# Output video settings
output_video_path = 'output_video.mp4'
output_fps = 30 

In [96]:
model = YOLO(model_path)

In [97]:
# Function to draw ground truth and predictions on each frame
def draw_detections(image, ground_truths):
    # Draw ground truth (green dot)
    for gt in ground_truths:
        x, y = int(gt[1]), int(gt[2])
        cv2.drawMarker(image, (x, y), color=[0, 255, 0], thickness=1, 
        markerType= cv2.MARKER_CROSS, line_type=cv2.LINE_AA,
        markerSize=10)
        overlay = image.copy()
        cv2.circle(overlay, (x, y), 10, (0, 0, 255), -1)  # Filled red circle
        cv2.addWeighted(overlay, 0.3, image, 0.7, 0, image)  # Blend the overlay with less intensity
    
    return image

In [98]:
# Function to read annotations from label file
def read_annotations(label_file):
    with open(label_file, 'r') as file:
        annotations = []
        for line in file.readlines():
            parts = line.strip().split()
            class_id = int(parts[0])
            center_x = float(parts[1])
            center_y = float(parts[2])
            width = float(parts[3])
            height = float(parts[4])
            annotations.append((class_id, center_x, center_y, width, height))
        return annotations

In [99]:
# Function to convert normalized annotations to pixel values
def convert_annotations_to_pixels(annotations, image_width, image_height):
    pixel_annotations = []
    for annotation in annotations:
        class_id, center_x, center_y, width, height = annotation
        center_x = int(center_x * image_width)
        center_y = int(center_y * image_height)
        pixel_annotations.append((class_id, center_x, center_y, width, height))
    return pixel_annotations

In [100]:
# Function to check if prediction is inside the red circle
def is_inside_circle(pred_x, pred_y, gt_x, gt_y, radius=10):
    distance_squared = np.sqrt((pred_x - gt_x)**2 + (pred_y - gt_y)**2)
    return distance_squared <= radius

In [101]:
frame_paths = []
for frame in os.listdir(frames_dir):
    if frame.endswith('.jpg'):
        frame_paths.append(os.path.join(frames_dir, frame))

In [102]:
# Initialize video writer
frame = cv2.imread(frame_paths[0])
height, width, _ = frame.shape
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_video_path, fourcc, output_fps, (width, height))
print(height, width)

640 640


In [103]:
TP = 0
FP = 0
FN = 0
F1 = 0
# Process each frame
for frame_path in frame_paths:
    # Read frame
    frame = cv2.imread(frame_path)
    
    # Read corresponding label file
    frame_name = os.path.splitext(os.path.basename(frame_path))[0]
    label_file = os.path.join(labels_dir, frame_name) + '.txt'
    
    ground_truths = read_annotations(label_file)
    ground_truths = convert_annotations_to_pixels(ground_truths, width, height)
    
    # Make predictions
    predictions = model.predict(frame)
    
    # Extracting bounding box center coordinates for predictions
    prediction_centers = []
    prediction_confidences = []
    for pred in predictions[0].boxes:
        if pred.conf > 0.1:  # You can set a threshold for confidence
            bbox = pred.xyxy[0]  # xyxy format
            x_center = (bbox[0] + bbox[2]) / 2
            y_center = (bbox[1] + bbox[3]) / 2
            prediction_centers.append([x_center.item(), y_center.item()])
            prediction_confidences.append(pred.conf)

    # Draw ground truth and predictions on the frame
    annotated_frame = draw_detections(frame, ground_truths)
    cv2.putText(annotated_frame, f'Ground Truth: {ground_truths}', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
    cv2.putText(annotated_frame, f'Predictions: {prediction_centers}', (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)

    # Track which ground truths have been matched
    matched_ground_truths = set()
    
    # Check true positives and false positives
    for j, pred in enumerate(prediction_centers):
        pred_x, pred_y = pred[0], pred[1]
        matched = False
        for i, gt in enumerate(ground_truths):
            gt_x, gt_y = gt[1], gt[2]
            
            print("g_x: ", gt_x)
            print('gt_y: ', gt_y)
            print('pred_x: ', pred_x)
            print('pred_y: ', pred_y)

            if is_inside_circle(pred_x, pred_y, gt_x, gt_y, radius=10):
                # True positive (prediction inside the red circle)
                cv2.putText(annotated_frame, 'True Positive', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
                matched_ground_truths.add(i)
                matched = True
                TP += prediction_confidences[j]
                break
        if not matched:
            # False positive (prediction outside the red circle)
            cv2.putText(annotated_frame, 'False Positive', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
            FP += prediction_confidences[j]

    # Check for false negatives (ground truths with no matching predictions)
    for i, gt in enumerate(ground_truths):
        if i not in matched_ground_truths:
            gt_x, gt_y = gt[1], gt[2]
            cv2.putText(annotated_frame, 'False Negative', (10,60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
            FN += 1

    # Write frame to the output video
    out.write(annotated_frame)
    
    # Display the frame (optional)
    cv2.imshow('Frame', annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
out.release()
cv2.destroyAllWindows()



0: 640x640 2 balls, 394.5ms
Speed: 12.8ms preprocess, 394.5ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 640)
g_x:  291
gt_y:  239
pred_x:  290.26092529296875
pred_y:  239.04818725585938
g_x:  291
gt_y:  239
pred_x:  290.2835388183594
pred_y:  238.92971801757812

0: 640x640 2 balls, 778.5ms
Speed: 18.2ms preprocess, 778.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
g_x:  282
gt_y:  249
pred_x:  282.443603515625
pred_y:  250.13539123535156
g_x:  282
gt_y:  249
pred_x:  282.7064208984375
pred_y:  249.75048828125

0: 640x640 (no detections), 465.6ms
Speed: 16.0ms preprocess, 465.6ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 343.2ms
Speed: 8.0ms preprocess, 343.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 310.2ms
Speed: 9.8ms preprocess, 310.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 388.1ms


In [93]:
F1 = (2 * TP)/(2 * TP + FP + FN)
print(f'True Positive: {TP}')
print(f'False Positive: {FP}')
print(f'False Negative: {FN}')
print(f'F1 Score: {F1}')

True Positive: tensor([176.4134])
False Positive: tensor([1.5118])
False Negative: 309
F1 Score: tensor([0.5319])
