In [1]:
import torchvision, torch, os, tqdm
from constants import *
from PIL import Image, ImageDraw

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
model.eval()

class BoundindBox:
    def __init__(self, x1, y1, x2, y2, confidence=float, class_id=None):
        self.x1, self.y1, self.x2, self.y2 = x1, y1, x2, y2
        self.confidence = confidence
        self.class_id = class_id

    def __repr__(self):
        return f"BoundindBox({self.x1}, {self.y1}, {self.x2}, {self.y2}, {self.confidence}, {self.class_id})"

def calculate_boxes(file_path: str) -> list[BoundindBox]:
    if not file_path.endswith('.jpg'):
        print(f"Skipping {file_path}")
        return []
    
    # get image and convert to black and white
    image = Image.open(file_path).convert('RGB')
    image = torchvision.transforms.ToTensor()(image).to(device)
    prediction = model([image])
    boxes = prediction[0]['boxes']
    
    return [BoundindBox(*box, confidence=confidence, class_id=class_id) for box, confidence, class_id in zip(boxes, prediction[0]['scores'], prediction[0]['labels'])]




In [14]:
# get all folders in PHOTOS_FOLDER 
folders = os.listdir(PHOTOS_FOLDER)[:10]

# calculate boxes for each image in each folder, export to PROCESSED_PHOTOS_FOLDER with the same structure
for folder in folders:
    folder_path = os.path.join(PHOTOS_FOLDER, folder)
    if not os.path.isdir(folder_path):
        continue

    processed_folder_path = os.path.join(PROCESSED_PHOTOS_FOLDER, folder)
    os.makedirs(processed_folder_path, exist_ok=True)

    for file in tqdm.tqdm(os.listdir(folder_path)):
        file_path = os.path.join(folder_path, file)
        boxes = calculate_boxes(file_path)
        image = Image.open(file_path)
        draw = ImageDraw.Draw(image)
        for box in boxes:
            # only get cat
            if box.class_id != 17:
                continue
            if box.confidence < 0.5:
                continue
            # draw box, the higher the confidence, the thicker the box
            draw.rectangle([box.x1, box.y1, box.x2, box.y2], outline='red', width=int(box.confidence * 10))
            
        image.save(os.path.join(processed_folder_path, file))

100%|██████████| 20/20 [00:04<00:00,  4.24it/s]
100%|██████████| 20/20 [00:04<00:00,  4.80it/s]
 35%|███▌      | 7/20 [00:01<00:03,  4.02it/s]


OSError: cannot write mode RGBA as JPEG