In [9]:
from PIL import Image
import cv2
import numpy as np
from ultralytics import YOLO
import os

class ImageCropper:
    def __init__(self, model_path: str):
        self.model = YOLO(model_path)

    def process_image(self, image_path: str, output_dir: str, classes=[1]):
        # Create output directory
        os.makedirs(output_dir, exist_ok=True)
        
        # Load image with cv2 for visualization
        cv_image = cv2.imread(image_path)
        # Load image with PIL for cropping
        pil_image = Image.open(image_path)
        
        # Run detection
        results = self.model(image_path, classes=classes, conf=0.2)
        
        # Process each detection
        for idx, result in enumerate(results):
            # result.show()
            boxes = result.boxes.xyxy
            
            for box_idx, box in enumerate(boxes):
                # Get original box coordinates and convert to integers
                x1, y1, x2, y2 = map(int, box.tolist())
                
                # Draw original box in red (BGR format)
                # cv2.rectangle(cv_image, (x1, y1), (x2, y2), (0, 0, 255), 2)
                
                # Calculate padded coordinates
                width = x2 - x1
                height = y2 - y1
                padded_x1 = max(0, x1 - int(width * 0.15))
                padded_y1 = max(0, y1 - int(height * 0.15))
                padded_x2 = min(cv_image.shape[1], x2 + int(width * 0.15))
                padded_y2 = min(cv_image.shape[0], y2 + int(height * 0.15))
                
                
                o_padded_x1 = max(0, x1 - int(width * 0.03))
                o_padded_y1 = max(0, y1 - int(height * 0.03))
                o_padded_x2 = min(cv_image.shape[1], x2 + int(width * 0.03))
                o_padded_y2 = min(cv_image.shape[0], y2 + int(height * 0.03))
                
                # Draw padded box in green (BGR format)
                # cv2.rectangle(cv_image, (padded_x1, padded_y1), (padded_x2, padded_y2), (0, 255, 0), 2)
                
                # Add labels with better positioning and background
                # Original box label
                # label = 'Original'
                # (label_w, label_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                # cv2.rectangle(cv_image, (x1, y1 - 20), (x1 + label_w, y1), (0, 0, 255), -1)
                # cv2.putText(cv_image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
                
                # Padded box label
                # label = 'Padded'
                # (label_w, label_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                # cv2.rectangle(cv_image, (padded_x1, padded_y1 - 20), (padded_x1 + label_w, padded_y1), (0, 255, 0), -1)
                # cv2.putText(cv_image, label, (padded_x1, padded_y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
                
                # Crop and save using padded coordinates (using PIL for consistency)
                cropped = cv_image[padded_y1:padded_y2, padded_x1:padded_x2]
                crop_no_pad = cv_image[o_padded_y1:o_padded_y2, o_padded_x1:o_padded_x2]

                output_path = os.path.join(output_dir, f"backup_{idx + 1}_{box_idx + 1}.jpg")
                cv2.imwrite(output_path, cropped, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
                
                output_path = os.path.join(output_dir, f"photo_{idx + 1}_{box_idx + 1}.jpg")
                cv2.imwrite(output_path, crop_no_pad, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
        
        # Save the visualization
        viz_path = os.path.join(output_dir, "boxes_visualization.jpg")
        cv2.imwrite(viz_path, cv_image)

def main():
    cropper = ImageCropper("yolo11x.pt")
    cropper.process_image(
        image_path="RPL_processed_photo_processed/Plumber/bijaya/form.jpg",
        output_dir="output_dir",
        classes=[0]  # Person class
    )

if __name__ == "__main__":
    main()


image 1/1 /home/amit/Projects/CTEVT/Form_Processing/RPL_processed_photo_processed/Plumber/bijaya/form.jpg: 640x480 2 persons, 932.3ms
Speed: 3.9ms preprocess, 932.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 480)
