In [3]:
import cv2
import torch
import math
import numpy as np
import pandas as pd
from typing import Tuple, Optional, List
from ultralytics import YOLO

class SyringeVolumeCalculator:
    def __init__(self) -> None:
        self.model = YOLO("runs/pose/train-pose11x-v20/weights/best.pt").eval()
        # Choose device: cuda > mps > cpu
        if torch.cuda.is_available():
            self.device: str = "cuda"
        elif torch.backends.mps.is_available():
            self.device = "mps"
        else:
            self.device = "cpu"
        self.model.to(self.device)
        self.syringe_diameter: float = 1.0  # cm

    @staticmethod
    def calculate_line_equation(p1: np.ndarray, p2: np.ndarray) -> Tuple[float, float, float]:
        x1, y1 = p1
        x2, y2 = p2
        A: float = y2 - y1
        B: float = x1 - x2
        C: float = (x2 - x1) * y1 - (y2 - y1) * x1
        return (A, B, C)

    def calculate_parallel_distance(
        self,
        line1: Tuple[float, float, float],
        line2: Tuple[float, float, float]
    ) -> Optional[float]:
        A1, B1, C1 = line1
        A2, B2, C2 = line2
        if abs(A1 * B2 - A2 * B1) > 1e-6:
            return None
        return abs(C2 - C1) / math.hypot(A1, B1)

    @staticmethod
    def point_to_line_distance(point: np.ndarray, line: Tuple[float, float, float]) -> float:
        x, y = point
        A, B, C = line
        return abs(A * x + B * y + C) / math.hypot(A, B)

    def calculate_height_for_detection(
        self,
        ll_point: np.ndarray,
        ul_point: np.ndarray,
        ur_point: np.ndarray,
        lr_point: np.ndarray,
        scale_factor: float
    ) -> float:
        lower_line_eq: Tuple[float, float, float] = self.calculate_line_equation(ll_point, lr_point)
        upper_line_eq: Tuple[float, float, float] = self.calculate_line_equation(ul_point, ur_point)
        h_pixels: Optional[float] = self.calculate_parallel_distance(lower_line_eq, upper_line_eq)
        d_ll: float = self.point_to_line_distance(ll_point, upper_line_eq)
        d_lr: float = self.point_to_line_distance(lr_point, upper_line_eq)
        avg_point_dist: float = (d_ll + d_lr) / 2
        if h_pixels is None or abs(h_pixels - avg_point_dist) > 5:
            return avg_point_dist * scale_factor
        return ((h_pixels * 0.7) + (avg_point_dist * 0.3)) * scale_factor

    def process_frame(self, frame: np.ndarray) -> Tuple[np.ndarray, List[float]]:
        # Run inference and get an annotated frame (with on-screen pose dots).
        results = self.model.predict(frame, verbose=False, conf=0.6)
        if not results:
            return frame, []
        result = results[0]
        annotated_frame: np.ndarray = result.plot()  # Draws the default keypoints and confidence dots.
        volumes: List[float] = []
        try:
            # Filter out any missing keypoints.
            keypoints_list = [kp for kp in result.keypoints.xy if kp is not None]
            # Get confidence values if available.
            if hasattr(result.keypoints, "conf") and result.keypoints.conf is not None:
                conf_numpy = result.keypoints.conf.cpu().numpy()
                confidences = list(conf_numpy)
            else:
                confidences = [None] * len(keypoints_list)
        except Exception as e:
            print(f"Error processing keypoints: {e}")
            return annotated_frame, volumes

        for i, kp in enumerate(keypoints_list):
            if kp is None:
                continue
            try:
                # Convert to numpy array if necessary.
                kpts: np.ndarray = kp.cpu().numpy() if hasattr(kp, "cpu") else kp
            except Exception as e:
                print(f"Error converting keypoints to numpy: {e}")
                continue

            if kpts.shape[0] < 4:
                continue

            # Retrieve confidence and convert it to a float if needed.
            conf = confidences[i] if i < len(confidences) else None
            ul_point: np.ndarray = kpts[1]
            if conf is not None:
                try:
                    conf_val = conf.item() if hasattr(conf, "item") else float(conf)
                    cv2.putText(annotated_frame, f"Conf: {conf_val:.2f}",
                                (int(ul_point[0]), int(ul_point[1]) - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
                except Exception as e:
                    print(f"Error formatting confidence: {e}")

            lower_edge_px: float = np.linalg.norm(kpts[3] - kpts[0])
            upper_edge_px: float = np.linalg.norm(kpts[2] - kpts[1])
            if lower_edge_px <= 0 or upper_edge_px <= 0:
                continue
            scale_factor: float = 1.0 / ((lower_edge_px + upper_edge_px) / 2.0)
            height_cm: float = self.calculate_height_for_detection(kpts[0], kpts[1], kpts[2], kpts[3], scale_factor)
            if height_cm <= 0 or height_cm > 30:
                continue
            volume: float = math.pi * (self.syringe_diameter / 2) ** 2 * height_cm
            volumes.append(volume)
            # Overlay volume and height text.
            cv2.putText(annotated_frame, f"Volume {i+1}: {volume:.2f} mL", (10, 30 + i * 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(annotated_frame, f"Height {i+1}: {height_cm:.1f} cm", (10, 70 + i * 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        return annotated_frame, volumes

    def run_on_video(
        self,
        input_video_path: str,
        output_video_path: str,
        output_csv_path: str
    ) -> None:
        cap = cv2.VideoCapture(input_video_path)
        if not cap.isOpened():
            raise IOError(f"Cannot open video file: {input_video_path}")
        width: int = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height: int = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps: float = cap.get(cv2.CAP_PROP_FPS)
        if fps <= 0:
            fps = 30
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
        frame_count: int = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        current_frame: int = 0
        csv_rows: List[dict] = []

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            processed_frame, volumes = self.process_frame(frame)
            out.write(processed_frame)
            row: dict = {"frame": current_frame}
            for idx, vol in enumerate(volumes):
                row[f"volume_estimation{idx+1}"] = vol
            csv_rows.append(row)
            current_frame += 1
            print(f"Processing frame {current_frame}/{frame_count}", end="\r")
        cap.release()
        out.release()
        cv2.destroyAllWindows()
        print("\nExport complete! 🎉")
        df = pd.DataFrame(csv_rows)
        df.to_csv(output_csv_path, index=False)
        print(f"CSV data exported to {output_csv_path}.")

# Set your file paths accordingly.
input_video_path: str = "../videos/input_videos/IMG_4594.MOV"     # Replace with the input video file path.
output_video_path: str = "../videos/output_videos/oxoxxx.mp4"   # Replace with the desired output video file path.
output_csv_path: str = "volume_estimations.csv"  # Replace with the desired CSV file path.

# Create an instance and run the video processing.
calculator = SyringeVolumeCalculator()
calculator.run_on_video(input_video_path, output_video_path, output_csv_path)

Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python scalar
Error formatting confidence: can only convert an array of size 1 to a Python

../videos/input_videos/IMG_4593.MOV