## Webcam Inference

In [None]:
import cv2
import torch
import math
import numpy as np
from typing import Tuple, Optional
from ultralytics import YOLO

class SyringeVolumeCalculator:
    def __init__(self) -> None:
        # Initialize YOLO model and set device
        self.model = YOLO("runs/pose/train-pose11x-v23-P50/weights/best.pt").eval()
        if torch.cuda.is_available():
            self.device = "cuda"
        elif torch.backends.mps.is_available():
            self.device = "mps"
        else:
            self.device = "cpu"
        self.model.to(self.device)
        # List of possible diameters in cm
        self.possible_diameters = [0.45, 1.0, 1.25, 2.0]
        # Keypoint variables
        self.ll_point: Optional[np.ndarray] = None
        self.ul_point: Optional[np.ndarray] = None
        self.ur_point: Optional[np.ndarray] = None
        self.lr_point: Optional[np.ndarray] = None

    def initialize_camera(self) -> cv2.VideoCapture:
        """Initialize the camera with high resolution."""
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)
        print(f"Camera resolution: {cap.get(cv2.CAP_PROP_FRAME_WIDTH)} x {cap.get(cv2.CAP_PROP_FRAME_HEIGHT)}")
        if not cap.isOpened():
            raise IOError("Camera initialization failed")
        return cap

    @staticmethod
    def calculate_line_equation(p1: np.ndarray, p2: np.ndarray) -> Tuple[float, float, float]:
        """Calculate line equation coefficients A, B, C from two points."""
        x1, y1 = p1
        x2, y2 = p2
        A = y2 - y1
        B = x1 - x2
        C = (x2 - x1) * y1 - (y2 - y1) * x1
        return (A, B, C)

    def calculate_parallel_distance(self, line1: Tuple[float, float, float],
                                    line2: Tuple[float, float, float]) -> Optional[float]:
        """Calculate distance between two parallel lines, return None if not parallel."""
        A1, B1, C1 = line1
        A2, B2, C2 = line2
        if abs(A1 * B2 - A2 * B1) > 1e-6:  # Not parallel
            return None
        return abs(C2 - C1) / math.hypot(A1, B1)

    @staticmethod
    def point_to_line_distance(point: np.ndarray, line: Tuple[float, float, float]) -> float:
        """Calculate perpendicular distance from a point to a line."""
        x, y = point
        A, B, C = line
        return abs(A * x + B * y + C) / math.hypot(A, B)

    def draw_debug_info(self, frame: np.ndarray) -> None:
        """Draw debug information like keypoint labels and edge lengths."""
        debug_params = {
            'fontFace': cv2.FONT_HERSHEY_SIMPLEX,
            'fontScale': 0.6,
            'color': (0, 0, 255),
            'thickness': 1
        }
        labels = ['LL', 'UL', 'UR', 'LR']
        points = [self.ll_point, self.ul_point, self.ur_point, self.lr_point]
        for label, pt in zip(labels, points):
            if pt is not None:
                cv2.putText(frame, label, (int(pt[0]) + 10, int(pt[1])), **debug_params)
        if self.ll_point is not None and self.lr_point is not None:
            lower_len = np.linalg.norm(self.lr_point - self.ll_point)
            cv2.putText(frame, f"Lower: {lower_len:.1f}px", (10, 100), **debug_params)
        if self.ul_point is not None and self.ur_point is not None:
            upper_len = np.linalg.norm(self.ur_point - self.ul_point)
            cv2.putText(frame, f"Upper: {upper_len:.1f}px", (10, 130), **debug_params)

    def draw_volume_table(self, frame: np.ndarray, volumes: list, table_x: int, table_y: int) -> None:
        """Draw a modern-looking table with diameter and volume data."""
        table_width = 250
        table_height = 150  # 5 rows: header + 4 diameters
        # Draw light gray background
        cv2.rectangle(frame, (table_x, table_y), (table_x + table_width, table_y + table_height), (220, 220, 220), -1)
        # Draw headers
        cv2.putText(frame, "Diameter", (table_x + 10, table_y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
        cv2.putText(frame, "mL", (table_x + 150, table_y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
        # Draw rows
        for i, (D, volume) in enumerate(volumes):
            y = table_y + 50 + i * 30
            cv2.putText(frame, f"{D:.2f}", (table_x + 10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
            if volume is not None:
                cv2.putText(frame, f"{volume:.2f}", (table_x + 150, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
            else:
                cv2.putText(frame, "N/A", (table_x + 150, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

    def process_frame(self, frame: np.ndarray) -> np.ndarray:
        """Process a frame to detect multiple syringes and display volume tables."""
        # Run YOLO model prediction on the frame
        results = self.model.predict(frame, verbose=False, conf=0.6)
        if not results:
            return frame
        
        result = results[0]
        annotated_frame = result.plot()  # Get the frame with detections plotted

        # Iterate over all detected syringes
        for i, box in enumerate(result.boxes):
            # Extract bounding box coordinates
            box_coords = box.xyxy[0].cpu().numpy()
            x1, y1, x2, y2 = map(int, box_coords)

            # Check if keypoints are available for this detection
            if result.keypoints is None or len(result.keypoints.xy) <= i or len(result.keypoints.xy[i]) < 4:
                continue  # Skip this syringe if keypoints are missing

            # Extract keypoints for this syringe
            try:
                kpts = result.keypoints.xy[i].cpu().numpy()
                ll_point, ul_point, ur_point, lr_point = kpts[:4]  # Lower-left, upper-left, upper-right, lower-right
            except Exception as e:
                print(f"Error extracting keypoints for syringe {i}: {e}")
                continue

            # Calculate volumes based on keypoints
            try:
                # Calculate edge lengths in pixels
                lower_edge_px = np.linalg.norm(lr_point - ll_point)
                upper_edge_px = np.linalg.norm(ur_point - ul_point)
                if lower_edge_px <= 0 or upper_edge_px <= 0:
                    continue
                avg_edge_px = (lower_edge_px + upper_edge_px) / 2.0

                # Calculate line equations for upper and lower edges
                lower_line_eq = self.calculate_line_equation(ll_point, lr_point)
                upper_line_eq = self.calculate_line_equation(ul_point, ur_point)

                # Calculate height in pixels
                h_pixels = self.calculate_parallel_distance(lower_line_eq, upper_line_eq)
                d_ll = self.point_to_line_distance(ll_point, upper_line_eq)
                d_lr = self.point_to_line_distance(lr_point, upper_line_eq)
                avg_point_dist = (d_ll + d_lr) / 2
                if h_pixels is not None and abs(h_pixels - avg_point_dist) <= 5:
                    height_pixels = h_pixels * 0.7 + avg_point_dist * 0.3
                else:
                    height_pixels = avg_point_dist

                # Calculate volumes for each possible diameter
                volumes = []
                for D in self.possible_diameters:
                    scale_factor_D = D / avg_edge_px
                    height_cm_D = height_pixels * scale_factor_D
                    if 0 < height_cm_D <= 30:  # Ensure height is reasonable
                        volume_D = math.pi * (D / 2) ** 2 * height_cm_D
                    else:
                        volume_D = None
                    volumes.append((D, volume_D))

                # Draw the volume table next to the syringe's bounding box
                table_x = x2 + 10  # Position table 10 pixels to the right of the bounding box
                table_y = y1       # Align table with the top of the bounding box
                self.draw_volume_table(annotated_frame, volumes, table_x, table_y)

            except Exception as e:
                print(f"Error processing syringe {i}: {e}")
                continue

        return annotated_frame

    def run(self) -> None:
        """Run the main loop to process video frames."""
        cap = self.initialize_camera()
        try:
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                processed_frame = self.process_frame(frame)
                cv2.imshow('Syringe Volume Measurement', processed_frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
        finally:
            cap.release()
            cv2.destroyAllWindows()

if __name__ == "__main__":
    calculator = SyringeVolumeCalculator()
    calculator.run()

## With tracking

Use bytetrack to track syringes.  
Should use GPU or "N" model to get as high FPS as possible.  


In [None]:
import cv2
import torch
import math
import numpy as np
from typing import Tuple, Optional
from ultralytics import YOLO

class SyringeVolumeCalculator:
    def __init__(self) -> None:
        # Initialize YOLO model and set device
        self.model = YOLO("runs/pose/train-pose11x-v23-P50/weights/best.pt").eval()
        if torch.cuda.is_available():
            self.device = "cuda"
        elif torch.backends.mps.is_available():
            self.device = "mps"
        else:
            self.device = "cpu"
        self.model.to(self.device)
        # List of possible diameters in cm
        self.possible_diameters = [0.45, 1.0, 1.25, 2.0]
        # Keypoint variables
        self.ll_point: Optional[np.ndarray] = None
        self.ul_point: Optional[np.ndarray] = None
        self.ur_point: Optional[np.ndarray] = None
        self.lr_point: Optional[np.ndarray] = None

    def initialize_camera(self) -> cv2.VideoCapture:
        """Initialize the camera with high resolution."""
        cap = cv2.VideoCapture(0)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)
        print(f"Camera resolution: {cap.get(cv2.CAP_PROP_FRAME_WIDTH)} x {cap.get(cv2.CAP_PROP_FRAME_HEIGHT)}")
        if not cap.isOpened():
            raise IOError("Camera initialization failed")
        return cap

    @staticmethod
    def calculate_line_equation(p1: np.ndarray, p2: np.ndarray) -> Tuple[float, float, float]:
        """Calculate line equation coefficients A, B, C from two points."""
        x1, y1 = p1
        x2, y2 = p2
        A = y2 - y1
        B = x1 - x2
        C = (x2 - x1) * y1 - (y2 - y1) * x1
        return (A, B, C)

    def calculate_parallel_distance(self, line1: Tuple[float, float, float],
                                    line2: Tuple[float, float, float]) -> Optional[float]:
        """Calculate distance between two parallel lines, return None if not parallel."""
        A1, B1, C1 = line1
        A2, B2, C2 = line2
        if abs(A1 * B2 - A2 * B1) > 1e-6:  # Not parallel
            return None
        return abs(C2 - C1) / math.hypot(A1, B1)

    @staticmethod
    def point_to_line_distance(point: np.ndarray, line: Tuple[float, float, float]) -> float:
        """Calculate perpendicular distance from a point to a line."""
        x, y = point
        A, B, C = line
        return abs(A * x + B * y + C) / math.hypot(A, B)

    def draw_debug_info(self, frame: np.ndarray) -> None:
        """Draw debug information like keypoint labels and edge lengths."""
        debug_params = {
            'fontFace': cv2.FONT_HERSHEY_SIMPLEX,
            'fontScale': 0.6,
            'color': (0, 0, 255),
            'thickness': 1
        }
        labels = ['LL', 'UL', 'UR', 'LR']
        points = [self.ll_point, self.ul_point, self.ur_point, self.lr_point]
        for label, pt in zip(labels, points):
            if pt is not None:
                cv2.putText(frame, label, (int(pt[0]) + 10, int(pt[1])), **debug_params)
        if self.ll_point is not None and self.lr_point is not None:
            lower_len = np.linalg.norm(self.lr_point - self.ll_point)
            cv2.putText(frame, f"Lower: {lower_len:.1f}px", (10, 100), **debug_params)
        if self.ul_point is not None and self.ur_point is not None:
            upper_len = np.linalg.norm(self.ur_point - self.ul_point)
            cv2.putText(frame, f"Upper: {upper_len:.1f}px", (10, 130), **debug_params)

    def draw_volume_table(self, frame: np.ndarray, volumes: list, table_x: int, table_y: int, track_id: int) -> None:
        """Draw a modern-looking table with diameter and volume data, including the track ID."""
        table_width = 250
        table_height = 150+50  # 5 rows: header + 4 diameters
        # Draw light gray background
        cv2.rectangle(frame, (table_x, table_y), (table_x + table_width, table_y + table_height), (220, 220, 220), -1)
        # Draw track ID above the table
        cv2.putText(frame, f"Syringe ID: {track_id}", (table_x + 10, table_y + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
        # Draw headers
        cv2.putText(frame, "Diameter", (table_x + 10, table_y + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
        cv2.putText(frame, "mL", (table_x + 150, table_y + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
        # Draw rows
        for i, (D, volume) in enumerate(volumes):
            y = table_y + 80 + i * 30
            cv2.putText(frame, f"{D:.2f}", (table_x + 10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
            if volume is not None:
                cv2.putText(frame, f"{volume:.2f}", (table_x + 150, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
            else:
                cv2.putText(frame, "N/A", (table_x + 150, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

    def process_frame(self, frame: np.ndarray) -> np.ndarray:
        """Process a frame to detect multiple syringes, calculate volumes, and display tables with IDs."""
        # Run YOLO model with ByteTrack tracking
        results = self.model.track(source=frame, persist=True, tracker='bytetrack.yaml', verbose=False, conf=0.6)
        if not results:
            return frame

        result = results[0]
        annotated_frame = result.plot()  # Draw bounding boxes with track IDs

        # Process each detected syringe
        for i, box in enumerate(result.boxes):
            # Extract track ID
            track_id = int(box.id) if box.id is not None else None
            if track_id is None:
                continue

            # Extract bounding box coordinates
            box_coords = box.xyxy[0].cpu().numpy()
            x1, y1, x2, y2 = map(int, box_coords)

            # Verify keypoints availability
            if result.keypoints is None or len(result.keypoints.xy) <= i or len(result.keypoints.xy[i]) < 4:
                continue

            # Extract keypoints
            try:
                kpts = result.keypoints.xy[i].cpu().numpy()
                ll_point, ul_point, ur_point, lr_point = kpts[:4]  # Lower-left, upper-left, upper-right, lower-right
            except Exception as e:
                print(f"Error extracting keypoints for syringe {track_id}: {e}")
                continue

            # Calculate volumes
            try:
                # Calculate width and height in pixels
                width_pixels = (np.linalg.norm(lr_point - ll_point) + np.linalg.norm(ur_point - ul_point)) / 2
                height_pixels = (np.linalg.norm(ul_point - ll_point) + np.linalg.norm(ur_point - lr_point)) / 2
                if width_pixels <= 0 or height_pixels <= 0:
                    continue

                # Calculate volumes for each diameter
                volumes = []
                for D in self.possible_diameters:
                    scale_factor_D = D / width_pixels
                    H_cm = height_pixels * scale_factor_D
                    if 0 < H_cm <= 30:  # Validate height (max 30 cm)
                        volume_D = math.pi * (D / 2) ** 2 * H_cm
                    else:
                        volume_D = None
                    volumes.append((D, volume_D))

                # Draw volume table with track ID
                table_x = x2 + 10  # Right of the bounding box
                table_y = y1       # Top of the bounding box
                self.draw_volume_table(annotated_frame, volumes, table_x, table_y, track_id)

                # Optional: Assign keypoints for debug drawing (if used elsewhere)
                self.ll_point, self.ul_point, self.ur_point, self.lr_point = ll_point, ul_point, ur_point, lr_point
                self.draw_debug_info(annotated_frame)

            except Exception as e:
                print(f"Error processing syringe {track_id}: {e}")
                continue

        return annotated_frame

    def run(self) -> None:
        """Run the main loop to process video frames."""
        cap = self.initialize_camera()
        try:
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                processed_frame = self.process_frame(frame)
                cv2.imshow('Syringe Volume Measurement', processed_frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
        finally:
            cap.release()
            cv2.destroyAllWindows()

if __name__ == "__main__":
    calculator = SyringeVolumeCalculator()
    calculator.run()



Camera resolution: 1920.0 x 1440.0


2025-03-24 23:00:32.219 python[72855:3782885] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-24 23:00:32.219 python[72855:3782885] +[IMKInputSession subclass]: chose IMKInputSession_Modern
