In [1]:
!pip install pycocotools --quiet
!git clone https://github.com/pytorch/vision.git
!git checkout v0.3.0

!cp vision/references/detection/utils.py ./

!cp vision/references/detection/transforms.py ./
!cp vision/references/detection/coco_eval.py ./
!cp vision/references/detection/engine.py ./
!cp vision/references/detection/coco_utils.py ./

Cloning into 'vision'...
remote: Enumerating objects: 590884, done.[K
remote: Counting objects: 100% (1162/1162), done.[K
remote: Compressing objects: 100% (875/875), done.[K
remote: Total 590884 (delta 989), reused 288 (delta 287), pack-reused 589722 (from 7)[K
Receiving objects: 100% (590884/590884), 1.12 GiB | 40.98 MiB/s, done.
Resolving deltas: 100% (552045/552045), done.
fatal: not a git repository (or any parent up to mount point /kaggle)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).


In [2]:
import os
import shutil
import logging
import random
import warnings
import time
import io
import sys
from tqdm import tqdm
from pathlib import Path

# Data handling and transformations
import cv2
import numpy as np
from PIL import Image
import xml.etree.ElementTree as ET

# Machine learning and deep learning libraries
import torch
import torchvision
from torchvision import transforms as torchtrans  
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

# For image augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

# Custom libraries
import utils
import transforms as T
import engine 

  check_for_updates()


In [3]:
data_dir = "/kaggle/input/sh17-dataset-for-ppe-detection"
output_dir = "/kaggle/working/data"

train_txt = os.path.join(data_dir, "train_files.txt")
val_txt = os.path.join(data_dir, "val_files.txt")

os.makedirs(os.path.join(output_dir, "train", "images"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "train", "labels"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "val", "images"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "val", "labels"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "test", "images"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "test", "labels"), exist_ok=True)

# Read file paths
with open(train_txt, "r") as f:
    train_files = f.read().splitlines()

with open(val_txt, "r") as f:
    test_files = f.read().splitlines()

# Split train into train and val (80:20)
train_files, val_files = train_test_split(train_files, test_size=0.2, random_state=42, shuffle=True)
print(f"Train: {len(train_files)} | Val: {len(val_files)} | Test: {len(test_files)}")

# Copy files to respective folders
def copy_files(file_list, src_images, src_labels, dst_images, dst_labels):
    for file in tqdm(file_list):
        image_file = os.path.join(src_images, file)
        label_file = os.path.join(src_labels, file.split(".")[0] + ".xml")

        # Copy images and labels
        if os.path.exists(image_file):
            shutil.copy(image_file, dst_images)
        else:
            print(f"Image not found: {image_file}")
            
        if os.path.exists(label_file):
            shutil.copy(label_file, dst_labels)
        else:
            print(f"Label not found: {label_file}")

# Define source and destination folders
src_images = os.path.join(data_dir, "images")
src_labels = os.path.join(data_dir, "voc_labels")

print("Copying train files...")
copy_files(train_files, src_images, src_labels,
           os.path.join(output_dir, "train", "images"),
           os.path.join(output_dir, "train", "labels"))

print("Copying val files...")
copy_files(val_files, src_images, src_labels,
           os.path.join(output_dir, "val", "images"),
           os.path.join(output_dir, "val", "labels"))

print("Copying test files...")
copy_files(test_files, src_images, src_labels,
           os.path.join(output_dir, "test", "images"),
           os.path.join(output_dir, "test", "labels"))

print("Data prepared!")

Train: 5183 | Val: 1296 | Test: 1620
Copying train files...


100%|██████████| 5183/5183 [02:28<00:00, 34.85it/s]


Copying val files...


100%|██████████| 1296/1296 [00:37<00:00, 34.79it/s]


Copying test files...


100%|██████████| 1620/1620 [00:47<00:00, 33.78it/s]

Data prepared!





In [4]:
# ========================================
# Check and update invalid bounding boxes
# ========================================

class BBoxValidator:
    def __init__(self, data_dir):
        """
        Initialize the BBoxValidator with the directory containing XML files.

        Parameters:
            data_dir: Path to the base directory containing train, val, and test folders.
        """
        self.data_dir = data_dir

    def validate_bbox(self, file_path):
        """
        Validate bounding boxes in a given XML file.

        Parameters:
            file_path (str): Path to the XML file.

        Returns:
            list: A list of invalid bounding boxes, each represented as a tuple (xmin, xmax, ymin, ymax).
        """
        invalid_bboxes = []
        try:
            tree = ET.parse(file_path)
            root = tree.getroot()
            size = root.find('size')
            image_width = int(size.find('width').text)
            image_height = int(size.find('height').text)

            for obj in root.findall('object'):
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                xmax = int(bndbox.find('xmax').text)
                ymin = int(bndbox.find('ymin').text)
                ymax = int(bndbox.find('ymax').text)

                # Check for invalid bounding box conditions
                if xmin < 0 or ymin < 0 or xmax > image_width or ymax > image_height or xmin >= xmax or ymin >= ymax:
                    invalid_bboxes.append((xmin, xmax, ymin, ymax))
        except Exception as e:
            print(f"Error validating {file_path}: {e}")
        
        return invalid_bboxes

    def find_invalid_files(self, labels_dir):
        """
        Find all XML files with invalid bounding boxes in a given directory.

        Parameters:
            labels_dir (str): Path to the directory containing XML label files.

        Returns:
            list: A list of tuples (file_name, invalid_bboxes).
        """
        invalid_files = []
        for file_name in os.listdir(labels_dir):
            if file_name.endswith('.xml'):
                file_path = os.path.join(labels_dir, file_name)
                invalid_bboxes = self.validate_bbox(file_path)
                if invalid_bboxes:
                    invalid_files.append((file_name, invalid_bboxes))
        return invalid_files

    def update_invalid_bboxes(self, labels_dir):
        """
        Update invalid bounding boxes in all XML files within the given directory.

        Parameters:
            labels_dir (str): Path to the directory containing XML label files.
        """
        for file_name in os.listdir(labels_dir):
            if file_name.endswith('.xml'):
                file_path = os.path.join(labels_dir, file_name)
                try:
                    tree = ET.parse(file_path)
                    root = tree.getroot()
                    size = root.find('size')
                    image_width = int(size.find('width').text)
                    image_height = int(size.find('height').text)

                    updated = False

                    for obj in root.findall('object'):
                        bbox = obj.find('bndbox')
                        if bbox is None:
                            continue

                        xmin = int(bbox.find('xmin').text)
                        ymin = int(bbox.find('ymin').text)
                        xmax = int(bbox.find('xmax').text)
                        ymax = int(bbox.find('ymax').text)

                        # Correct invalid bounding boxes
                        if xmin < 0:
                            bbox.find('xmin').text = '0'
                            updated = True
                        if ymin < 0:
                            bbox.find('ymin').text = '0'
                            updated = True
                        if xmax > image_width:
                            bbox.find('xmax').text = str(image_width)
                            updated = True
                        if ymax > image_height:
                            bbox.find('ymax').text = str(image_height)
                            updated = True
                        if xmin >= xmax:
                            bbox.find('xmax').text = str(xmin + 1)
                            updated = True
                        if ymin >= ymax:
                            bbox.find('ymax').text = str(ymin + 1)
                            updated = True

                    if updated:
                        tree.write(file_path)
                        print(f"    => Updated invalid bounding boxes in {file_path}")

                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")
    
    def process_all_folders(self):
        """
        Validate and update bounding boxes in train, val, and test label folders.
        """
        for folder in ['train', 'val', 'test']:
            labels_dir = os.path.join(self.data_dir, folder, 'labels')
            print(f"\nProcessing {labels_dir}...")

            # Find and print invalid files
            invalid_files = self.find_invalid_files(labels_dir)
            if invalid_files:
                print(f"    Files with invalid bounding boxes in {folder}:")
                for file_name, bboxes in invalid_files:
                    print(f"        File: {file_name}")
                    for bbox in bboxes:
                        print(f"              Invalid BBox: {bbox}")
            else:
                print(f"    No invalid bounding boxes found in {folder}.")

            # Update invalid bounding boxes
            self.update_invalid_bboxes(labels_dir)

In [5]:
data_dir = '/kaggle/working/data'
validator = BBoxValidator(data_dir)
validator.process_all_folders()


Processing /kaggle/working/data/train/labels...
    Files with invalid bounding boxes in train:
        File: pexels-photo-259265.xml
              Invalid BBox: (1559, 3509, 123, 1358)
        File: pexels-photo-258626.xml
              Invalid BBox: (1725, 1874, 4430, 4664)
    => Updated invalid bounding boxes in /kaggle/working/data/train/labels/pexels-photo-259265.xml
    => Updated invalid bounding boxes in /kaggle/working/data/train/labels/pexels-photo-258626.xml

Processing /kaggle/working/data/val/labels...
    No invalid bounding boxes found in val.

Processing /kaggle/working/data/test/labels...
    No invalid bounding boxes found in test.


In [6]:
# ========================================
# Class Custom Dataset
# ========================================
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root, class_mapping, transforms=None):
        self.root = root
        self.class_mapping = class_mapping
        self.transforms = transforms
        self.images = sorted(os.listdir(os.path.join(root, "images")))  # Sort images
        self.labels = sorted(os.listdir(os.path.join(root, "labels")))  # Sort labels

        # Ensure the same number of images and labels
        assert len(self.images) == len(self.labels), "Mismatch between number of images and labels"
        
        # Optionally ensure filenames (without extensions) match between images and labels
        for img, label in zip(self.images, self.labels):
            assert img.split('.')[0] == label.split('.')[0], f"Image {img} and label {label} do not match"

    def __len__(self):
        return len(self.images)
        
    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.images[idx])
        label_path = os.path.join(self.root, "labels", self.labels[idx])
        img = Image.open(img_path).convert("RGB")
        tree = ET.parse(label_path)
        root = tree.getroot()

        boxes = []
        labels = []
        img_width, img_height = img.size
        for obj in root.findall("object"):
            bbox = obj.find("bndbox")
            xmin = int(bbox.find("xmin").text)
            ymin = int(bbox.find("ymin").text)
            xmax = int(bbox.find("xmax").text)
            ymax = int(bbox.find("ymax").text)
            boxes.append([xmin, ymin, xmax, ymax])
            name = obj.find("name").text
            
            if name in self.class_mapping:
                labels.append(self.class_mapping[name])
            else:
                raise ValueError(f"Unknown label '{name}' in {label_path}")

        # Convert boxes to numpy array for albumentations (no manual normalization here)
        boxes = np.array(boxes, dtype=np.float32)
        
        # Apply transformations if provided
        if self.transforms:
            # albumentations requires input to be a dictionary with 'image' and 'bboxes'
            augmented = self.transforms(image=np.array(img), bboxes=boxes, labels=labels)
            img = augmented['image']
            boxes = augmented['bboxes']

        # Convert boxes to tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        # Calculate area of each box
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # Suppose all instances are not crowd
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

        # Prepare the target dictionary
        target = {
            "boxes": boxes,
            "labels": labels,
            "area": area,
            "iscrowd": iscrowd,
            "image_id": int(idx)
        }

        return img, target



# =================================
# Data Loaders
# =================================
def collate_fn(batch):
    return tuple(zip(*batch))
    
def get_dataloaders(data_dir, class_mapping, batch_size=4):
    # Define albumentations transformations
    image_transforms = A.Compose(
        [
            A.Resize(640, 640),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0),
            ToTensorV2(),
        ],
        bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels'])
    )
    


    # Create dataset instances
    train_dataset = CustomDataset(os.path.join(data_dir, "train"), class_mapping, transforms=image_transforms)
    val_dataset = CustomDataset(os.path.join(data_dir, "val"), class_mapping, transforms=image_transforms)
    test_dataset = CustomDataset(os.path.join(data_dir, "test"), class_mapping, transforms=image_transforms)

    # Create data loaders
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

    return train_loader, val_loader, test_loader

In [7]:
import torch
torch.cuda.empty_cache()

In [8]:
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
from engine import train_one_epoch, evaluate

# ========================
# Logging Configuration
# ========================
LOG_FILE = "/kaggle/working/results/log.txt"
Path(LOG_FILE).parent.mkdir(parents=True, exist_ok=True)

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s - %(levelname)s - %(message)s",
                    handlers=[
                        logging.FileHandler(LOG_FILE, mode='w'),
                        logging.StreamHandler()  # Display on terminal
                    ])
logger = logging.getLogger()


# ========================
# Define Class Mapping
# ========================
class_mapping = {
    "__background__": 0,
    "person": 1,
    "ear": 2,
    "ear-mufs": 3,
    "face": 4,
    "face-guard": 5,
    "face-mask-medical": 6,
    "foot": 7,
    "tools": 8,
    "glasses": 9,
    "gloves": 10,
    "helmet": 11,
    "hands": 12,
    "head": 13,
    "medical-suit": 14,
    "shoes": 15,
    "safety-suit": 16,
    "safety-vest": 17
}


# ========================
# Get Model
# ========================
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="COCO_V1")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    print("Model Done!")
    return model

# # ========================
# # Main Script
# # ========================
# def main(data_dir):
    
#     batch_size = 16
#     num_classes = len(class_mapping)
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     print("Device:", device)
    
#     # Load Data
#     train_loader, val_loader, test_loader = get_dataloaders(data_dir, class_mapping, batch_size)
    
#     # Initialize Model
#     model = get_model(num_classes)
#     model.to(device)
    
#     # Optimizer
#     optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
    
#     # Evaluation
#     logger.info("Starting evaluation...")
#     train_one_epoch(model, optimizer, train_loader, device, 0, print_freq=20)
#     metric = evaluate(model, val_loader, device)
#     logger.info(f"Evaluation results: {metric}")
#     print(metric)

# if __name__=='__main__':
#     data_dir = "/kaggle/working/data"
#     main(data_dir)

In [9]:
# ========================
# Main Script
# ========================
def main(data_dir, num_epochs=10):
    batch_size = 16
    num_classes = len(class_mapping)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device:", device)
    
    # Load Data
    train_loader, val_loader, test_loader = get_dataloaders(data_dir, class_mapping, batch_size)
    
    # Initialize Model
    model = get_model(num_classes)
    model.to(device)
    
    # Optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)

    # Training Loop
    for epoch in range(num_epochs):
        logger.info(f"Starting epoch {epoch + 1}/{num_epochs}")
        
        # Train One Epoch
        train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=20)
        
        # Evaluate on Validation Set
        logger.info("Evaluating on validation set...")
        metrics = evaluate(model, val_loader, device)
        logger.info(f"Validation metrics after epoch {epoch + 1}: {metrics}")

    # Final Evaluation on Test Set
    logger.info("Evaluating on test set...")
    final_metrics = evaluate(model, test_loader, device)
    logger.info(f"Final evaluation metrics: {final_metrics}")
    print(final_metrics)

if __name__ == '__main__':
    data_dir = "/kaggle/working/data"
    num_epochs = 10
    main(data_dir, num_epochs)

Device: cuda


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 209MB/s]


Model Done!


  with torch.cuda.amp.autocast(enabled=scaler is not None):


Epoch: [0]  [  0/324]  eta: 0:52:25  lr: 0.000041  loss: 3.7570 (3.7570)  loss_classifier: 2.5940 (2.5940)  loss_box_reg: 0.2883 (0.2883)  loss_objectness: 0.6832 (0.6832)  loss_rpn_box_reg: 0.1914 (0.1914)  time: 9.7074  data: 6.8582  max mem: 11366
Epoch: [0]  [ 20/324]  eta: 0:39:38  lr: 0.000660  loss: 1.8621 (2.1189)  loss_classifier: 0.8595 (1.1714)  loss_box_reg: 0.3967 (0.3832)  loss_objectness: 0.4055 (0.4064)  loss_rpn_box_reg: 0.1618 (0.1579)  time: 7.7307  data: 6.2731  max mem: 11523
Epoch: [0]  [ 40/324]  eta: 0:36:38  lr: 0.001278  loss: 1.4120 (1.7787)  loss_classifier: 0.6300 (0.9140)  loss_box_reg: 0.4501 (0.4149)  loss_objectness: 0.1888 (0.3015)  loss_rpn_box_reg: 0.1357 (0.1483)  time: 7.6550  data: 6.2017  max mem: 11523
Epoch: [0]  [ 60/324]  eta: 0:33:41  lr: 0.001897  loss: 1.2690 (1.6202)  loss_classifier: 0.5492 (0.7929)  loss_box_reg: 0.4625 (0.4305)  loss_objectness: 0.1270 (0.2540)  loss_rpn_box_reg: 0.1356 (0.1427)  time: 7.4814  data: 6.0272  max mem: 11