In [5]:
from PIL import Image
import cv2
import numpy as np
from ultralytics import YOLO
import os

class ImageCropper:
    def __init__(self, model_path: str):
        self.model = YOLO(model_path)

    def process_image(self, image_path: str, output_dir: str, classes=[1]):
        # Create output directory
        os.makedirs(output_dir, exist_ok=True)
        
        # Load image with cv2 for visualization
        cv_image = cv2.imread(image_path)
        # Load image with PIL for cropping
        pil_image = Image.open(image_path)
        
        # Run detection
        results = self.model(image_path, classes=classes, conf=0.2)
        
        # Process each detection
        for idx, result in enumerate(results):
            # result.show()
            boxes = result.boxes.xyxy
            
            for box_idx, box in enumerate(boxes):
                # Get original box coordinates and convert to integers
                x1, y1, x2, y2 = map(int, box.tolist())
                
                # Draw original box in red (BGR format)
                # cv2.rectangle(cv_image, (x1, y1), (x2, y2), (0, 0, 255), 2)
                
                # Calculate padded coordinates
                width = x2 - x1
                height = y2 - y1
                padded_x1 = max(0, x1 - int(width * 0.1))
                padded_y1 = max(0, y1 - int(height * 0.1))
                padded_x2 = min(cv_image.shape[1], x2 + int(width * 0.1))
                padded_y2 = min(cv_image.shape[0], y2 + int(height * 0.1))
                
                # Draw padded box in green (BGR format)
                # cv2.rectangle(cv_image, (padded_x1, padded_y1), (padded_x2, padded_y2), (0, 255, 0), 2)
                
                # Add labels with better positioning and background
                # Original box label
                # label = 'Original'
                # (label_w, label_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                # cv2.rectangle(cv_image, (x1, y1 - 20), (x1 + label_w, y1), (0, 0, 255), -1)
                # cv2.putText(cv_image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
                
                # Padded box label
                # label = 'Padded'
                # (label_w, label_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                # cv2.rectangle(cv_image, (padded_x1, padded_y1 - 20), (padded_x1 + label_w, padded_y1), (0, 255, 0), -1)
                # cv2.putText(cv_image, label, (padded_x1, padded_y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
                
                # Crop and save using padded coordinates (using PIL for consistency)
                cropped = cv_image[padded_y1:padded_y2, padded_x1:padded_x2]

                output_path = os.path.join(output_dir, f"crop_{idx + 1}_{box_idx + 1}.jpg")
                cv2.imwrite(output_path, cropped, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        
        # Save the visualization
        viz_path = os.path.join(output_dir, "boxes_visualization.jpg")
        cv2.imwrite(viz_path, cv_image)

def main():
    cropper = ImageCropper("yolo11x.pt")
    cropper.process_image(
        image_path="RPL_processed_photo_processed/Plumber/bijaya/form.jpg",
        output_dir="output_dir",
        classes=[0]  # Person class
    )

if __name__ == "__main__":
    main()


image 1/1 /home/amit/Projects/CTEVT/Form_Processing/RPL_processed_photo_processed/Plumber/bijaya/form.jpg: 640x480 2 persons, 974.6ms
Speed: 4.7ms preprocess, 974.6ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)


In [10]:
from PIL import Image
import cv2
import numpy as np
from ultralytics import YOLO
import os
import math

class ImageCropper:
    def __init__(self, model_path: str):
        self.model = YOLO(model_path)

    def rotate_box(self, image, box, angle_deg):
        """Rotate a bounding box around its center."""
        # Convert angle to radians
        angle_rad = math.radians(angle_deg)
        
        # Get box center
        cx = (box[0] + box[2]) / 2
        cy = (box[1] + box[3]) / 2
        
        # Get box width and height
        width = box[2] - box[0]
        height = box[3] - box[1]
        
        # Create rotation matrix
        rotation_matrix = cv2.getRotationMatrix2D((cx, cy), angle_deg, 1.0)
        
        # Get corners of the box
        corners = np.array([
            [box[0], box[1]],
            [box[2], box[1]],
            [box[2], box[3]],
            [box[0], box[3]]
        ])
        
        # Rotate corners
        ones = np.ones(shape=(len(corners), 1))
        corners_ones = np.hstack([corners, ones])
        rotated_corners = rotation_matrix.dot(corners_ones.T).T
        
        return rotated_corners

    def process_image(self, image_path: str, output_dir: str, classes=[1]):
        os.makedirs(output_dir, exist_ok=True)
        
        # Load images
        cv_image = cv2.imread(image_path)
        pil_image = Image.open(image_path)
        
        # Run detection
        results = self.model(image_path, classes=classes, conf=0.2)
        
        for idx, result in enumerate(results):
            # Get both regular boxes and rotation angles
            boxes = result.boxes.xyxy.cpu().numpy()
            # Check if the model outputs rotation angles
            if hasattr(result.boxes, 'ang'):
                angles = result.boxes.ang.cpu().numpy()
            else:
                angles = np.zeros(len(boxes))  # Default to 0 if no angle information
            
            for box_idx, (box, angle) in enumerate(zip(boxes, angles)):
                x1, y1, x2, y2 = map(int, box.tolist())
                
                # Get rotated corners
                rotated_corners = self.rotate_box(cv_image, box, angle)
                
                # Draw original oriented box in red
                corners_int = rotated_corners.astype(np.int32)
                cv2.polylines(cv_image, [corners_int], True, (0, 0, 255), 2)
                
                # Calculate padded box with rotation
                width = x2 - x1
                height = y2 - y1
                padding_x = int(width * 0.1)
                padding_y = int(height * 0.1)
                
                padded_box = [
                    x1 - padding_x,
                    y1 - padding_y,
                    x2 + padding_x,
                    y2 + padding_y
                ]
                
                # Get padded rotated corners
                padded_rotated_corners = self.rotate_box(cv_image, padded_box, angle)
                
                # Draw padded oriented box in green
                padded_corners_int = padded_rotated_corners.astype(np.int32)
                cv2.polylines(cv_image, [padded_corners_int], True, (0, 255, 0), 2)
                
                # Create a rotation matrix for the crop
                center = ((x1 + x2) // 2, (y1 + y2) // 2)
                rotation_matrix = cv2.getRotationMatrix2D(center, -angle, 1.0)
                
                # Rotate the image
                rotated_image = cv2.warpAffine(
                    cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR),
                    rotation_matrix,
                    (cv_image.shape[1], cv_image.shape[0])
                )
                
                # Crop the rotated image
                padded_crop = rotated_image[
                    max(0, int(y1 - padding_y)):min(rotated_image.shape[0], int(y2 + padding_y)),
                    max(0, int(x1 - padding_x)):min(rotated_image.shape[1], int(x2 + padding_x))
                ]
                
                # Save the cropped image
                output_path = os.path.join(output_dir, f"crop_{idx + 1}_{box_idx + 1}.jpg")
                cv2.imwrite(output_path, padded_crop)
                
                # Add labels
                self._add_label(cv_image, rotated_corners[0], "Original", (0, 0, 255))
                self._add_label(cv_image, padded_rotated_corners[0], "Padded", (0, 255, 0))
        
        # # Save visualization
        # viz_path = os.path.join(output_dir, "boxes_visualization.jpg")
        # cv2.imwrite(viz_path, cv_image)

    def _add_label(self, image, position, text, color):
        """Helper method to add labels with background."""
        x, y = map(int, position)
        (label_w, label_h), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
        cv2.rectangle(image, (x, y - 20), (x + label_w, y), color, -1)
        cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)

def main():
    cropper = ImageCropper("yolo11x-obb.pt")
    cropper.process_image(
        image_path="RPL_processed_photo_processed/Mason/man_bahadur/form.jpg",
        output_dir="output_dir",
        classes=[0]  # Person class
    )

if __name__ == "__main__":
    main()


image 1/1 /home/amit/Projects/CTEVT/Form_Processing/RPL_processed_photo_processed/Mason/man_bahadur/form.jpg: 1024x672 224346.5ms
Speed: 1362.9ms preprocess, 224346.5ms inference, 70.9ms postprocess per image at shape (1, 3, 1024, 672)


AttributeError: 'NoneType' object has no attribute 'xyxy'