<a href="https://colab.research.google.com/github/durgas4/PPE-detection-using-yolo8/blob/main/PPE_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.2.74-py3-none-any.whl.metadata (41 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/41.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━[0m [32m30.7/41.3 kB[0m [31m711.8 kB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m657.2 kB/s[0m eta [36m0:00:00[0m
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl.metadata (8.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


For the following code, try to use cv2.rectangle to draw rectangle around images

### **XML to text conversion and class mapping**

# “pascalVOC_to_yolo.py”

In [6]:
import os
import xml.etree.ElementTree as ET


def load_class_mapping(mapping_file):
    class_mapping = {}
    with open(mapping_file, 'r') as file:
        for idx, line in enumerate(file):
            class_name = line.strip()
            class_mapping[class_name] = idx
    return class_mapping

import os
import xml.etree.ElementTree as ET

def load_class_mapping(mapping_file):
    class_mapping = {}
    with open(mapping_file, 'r') as file:
        for idx, line in enumerate(file):
            class_name = line.strip()
            class_mapping[class_name] = idx
    return class_mapping

def convert_voc_to_yolov8(voc_folder, yolov8_folder, class_mapping_file):
    # Create YOLOv8 folder if it doesn't exist
    if not os.path.exists(yolov8_folder):
        os.makedirs(yolov8_folder)

    # Load class mapping from file
    class_mapping = load_class_mapping(class_mapping_file)

    for filename in os.listdir(voc_folder):
        if filename.endswith(".xml"):
            xml_path = os.path.join(voc_folder, filename)
            tree = ET.parse(xml_path)
            root = tree.getroot()

            # Get image dimensions
            size = root.find("size")
            img_width = float(size.find("width").text)
            img_height = float(size.find("height").text)

            yolo_filename = os.path.splitext(filename)[0] + ".txt"
            with open(os.path.join(yolov8_folder, yolo_filename), "w") as yolo_file:
                for obj in root.findall("object"):
                    class_name = obj.find("name").text
                    if class_name in class_mapping:
                        class_id = class_mapping[class_name]  # Get the class ID from the mapping
                    else:
                        print(f"Warning: Class '{class_name}' not found in mapping file.")
                        continue

                    bbox = obj.find("bndbox")
                    x_min = float(bbox.find("xmin").text)
                    y_min = float(bbox.find("ymin").text)
                    x_max = float(bbox.find("xmax").text)
                    y_max = float(bbox.find("ymax").text)

                    # Calculate bounding box width and height
                    width = x_max - x_min
                    height = y_max - y_min

                    # Calculate normalized coordinates
                    x_center = (x_min + width / 2) / img_width
                    y_center = (y_min + height / 2) / img_height
                    width /= img_width
                    height /= img_height

                    # Write class ID and normalized coordinates to YOLO format
                    yolo_file.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

            # Remove the XML file after conversion
            os.remove(xml_path)

# Example usage
voc_folder = "/content/drive/MyDrive/datasets/labels (1)"
yolov8_folder = "/content/drive/MyDrive/datasets/label"
class_mapping_file = "/content/drive/MyDrive/datasets/classes.txt"

convert_voc_to_yolov8(voc_folder, yolov8_folder, class_mapping_file)


## Train Test split - Person detection

In [None]:
import os
import shutil

def filter_annotations_and_images(source_images_dir, source_labels_dir, target_class_id, output_images_dir, output_labels_dir):
    if not os.path.exists(output_images_dir):
        os.makedirs(output_images_dir)
    if not os.path.exists(output_labels_dir):
        os.makedirs(output_labels_dir)

    label_files = [f for f in os.listdir(source_labels_dir) if f.endswith('.txt')]

    for label_file in label_files:
        label_path = os.path.join(source_labels_dir, label_file)
        image_file = label_file.replace('.txt', '.jpg')  # Adjust extension if needed
        image_path = os.path.join(source_images_dir, image_file)

        with open(label_path, 'r') as file:
            lines = file.readlines()

        filtered_lines = [line for line in lines if int(line.split()[0]) == target_class_id]

        if filtered_lines:
            if os.path.exists(image_path):
                shutil.copy(image_path, output_images_dir)

            new_label_path = os.path.join(output_labels_dir, label_file)
            with open(new_label_path, 'w') as file:
                file.writelines(filtered_lines)

def prepare_dataset(source_images_dir, source_labels_dir, target_class_id, train_images_dir, train_labels_dir, val_images_dir, val_labels_dir):
    for directory in [train_images_dir, train_labels_dir, val_images_dir, val_labels_dir]:
        if not os.path.exists(directory):
            os.makedirs(directory)

    all_label_files = [f for f in os.listdir(source_labels_dir) if f.endswith('.txt')]
    import random
    random.shuffle(all_label_files)
    split_index = int(len(all_label_files) * 0.8)
    train_label_files = all_label_files[:split_index]
    val_label_files = all_label_files[split_index:]

    for label_files, img_output_dir, lbl_output_dir in [(train_label_files, train_images_dir, train_labels_dir), (val_label_files, val_images_dir, val_labels_dir)]:
        for label_file in label_files:
            label_path = os.path.join(source_labels_dir, label_file)
            image_file = label_file.replace('.txt', '.jpg')  # Adjust extension if needed
            image_path = os.path.join(source_images_dir, image_file)

            with open(label_path, 'r') as file:
                lines = file.readlines()

            filtered_lines = [line for line in lines if int(line.split()[0]) == target_class_id]

            if filtered_lines:
                if os.path.exists(image_path):
                    shutil.copy(image_path, img_output_dir)

                new_label_path = os.path.join(lbl_output_dir, label_file)
                with open(new_label_path, 'w') as file:
                    file.writelines(filtered_lines)

# Directories and parameters
# Directories
source_images_dir = '/content/drive/MyDrive/datasets/images'
source_labels_dir = '/content/drive/MyDrive/datasets/labels'
target_class_id = 0  # ID of the class you want to detect

# Output directories for filtered data
train_images_dir = '/content/drive/MyDrive/datasets/person_Detection/train/images'
train_labels_dir = '/content/drive/MyDrive/datasets/person_Detection/train/labels'
val_images_dir = '/content/drive/MyDrive/datasets/person_Detection/val/images'
val_labels_dir = '/content/drive/MyDrive/datasets/person_Detection/val/labels'

# Prepare the dataset
prepare_dataset(source_images_dir, source_labels_dir, target_class_id, train_images_dir, train_labels_dir, val_images_dir, val_labels_dir)


# Training yolo8

In [22]:
# Load YOLOv8n-cls, train it on mnist160 for 10 epochs and predict an image with it
from ultralytics import YOLO
model = YOLO("/content/drive/MyDrive/datasets/person_Detection/yolov8n (1).pt")  # load a pretrained YOLOv8n classification model
model.train(data='/content/drive/MyDrive/datasets/person_Detection/data.yaml', epochs=10)



Ultralytics YOLOv8.2.74 🚀 Python-3.10.12 torch-2.3.1+cu121 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/content/drive/MyDrive/datasets/person_Detection/yolov8n (1).pt, data=/content/drive/MyDrive/datasets/person_Detection/data.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=

[34m[1mtrain: [0mScanning /content/drive/MyDrive/datasets/person_Detection/train/labels... 392 images, 0 backgrounds, 0 corrupt: 100%|██████████| 392/392 [00:05<00:00, 66.92it/s] 


[34m[1mtrain: [0mNew cache created: /content/drive/MyDrive/datasets/person_Detection/train/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /content/drive/MyDrive/datasets/person_Detection/val/labels... 84 images, 0 backgrounds, 0 corrupt: 100%|██████████| 84/84 [00:00<00:00, 102.24it/s]


[34m[1mval: [0mNew cache created: /content/drive/MyDrive/datasets/person_Detection/val/labels.cache
Plotting labels to runs/detect/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train3[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G     0.9514      1.838      1.166         17        640: 100%|██████████| 25/25 [05:28<00:00, 13.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:22<00:00,  7.54s/it]

                   all         84        288      0.941       0.61      0.811      0.577






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      1.095      1.445      1.243         21        640: 100%|██████████| 25/25 [05:20<00:00, 12.80s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:22<00:00,  7.55s/it]

                   all         84        288       0.94      0.549      0.818      0.522






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G      1.103      1.426      1.274         22        640: 100%|██████████| 25/25 [05:06<00:00, 12.25s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:21<00:00,  7.32s/it]

                   all         84        288       0.73      0.531      0.677      0.434






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G      1.115      1.406      1.278         31        640: 100%|██████████| 25/25 [05:04<00:00, 12.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:20<00:00,  6.70s/it]

                   all         84        288      0.757      0.594      0.712      0.385






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      1.148      1.354      1.312         18        640: 100%|██████████| 25/25 [04:50<00:00, 11.64s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:24<00:00,  8.09s/it]

                   all         84        288      0.791      0.649       0.75      0.481






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G      1.069      1.231      1.242         17        640: 100%|██████████| 25/25 [04:50<00:00, 11.64s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:21<00:00,  7.15s/it]

                   all         84        288      0.864      0.788      0.872      0.589






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G      1.017      1.163      1.213         29        640: 100%|██████████| 25/25 [04:48<00:00, 11.54s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:20<00:00,  6.70s/it]

                   all         84        288      0.883      0.826      0.909      0.648






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G     0.9195      1.047      1.149         20        640: 100%|██████████| 25/25 [04:48<00:00, 11.54s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:20<00:00,  6.78s/it]

                   all         84        288      0.876      0.868      0.925      0.686






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G     0.9245      1.018      1.155         36        640: 100%|██████████| 25/25 [04:46<00:00, 11.45s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:19<00:00,  6.37s/it]

                   all         84        288      0.933      0.882      0.941      0.722






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G     0.8365     0.9426      1.108         33        640: 100%|██████████| 25/25 [04:43<00:00, 11.34s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:20<00:00,  6.86s/it]

                   all         84        288       0.93      0.896      0.958      0.763






10 epochs completed in 0.893 hours.
Optimizer stripped from runs/detect/train3/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train3/weights/best.pt, 6.2MB

Validating runs/detect/train3/weights/best.pt...
Ultralytics YOLOv8.2.74 🚀 Python-3.10.12 torch-2.3.1+cu121 CPU (Intel Xeon 2.20GHz)
Model summary (fused): 168 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:18<00:00,  6.10s/it]


                   all         84        288      0.929      0.896      0.958      0.762
Speed: 2.2ms preprocess, 187.6ms inference, 0.0ms loss, 2.0ms postprocess per image
Results saved to [1mruns/detect/train3[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7825477ebdf0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.048048, 

In [10]:
model('/content/drive/MyDrive/datasets/whole_image.jpg')  # predict on an image


image 1/1 /content/drive/MyDrive/datasets/whole_image.jpg: 640x640 3 persons, 8.0ms
Speed: 2.2ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person'}
 obb: None
 orig_img: array([[[ 56, 150, 149],
         [ 59, 153, 152],
         [ 64, 158, 157],
         ...,
         [ 34,  84,  96],
         [ 35,  86, 102],
         [ 39,  90, 106]],
 
        [[ 63, 157, 156],
         [ 66, 160, 159],
         [ 70, 164, 163],
         ...,
         [ 22,  72,  84],
         [ 24,  73,  87],
         [ 25,  77,  90]],
 
        [[ 75, 172, 170],
         [ 77, 174, 172],
         [ 81, 178, 176],
         ...,
         [ 15,  67,  74],
         [ 18,  69,  79],
         [ 22,  73,  83]],
 
        ...,
 
        [[182, 214, 220],
         [179, 211, 217],
         [175, 206, 215],
         ...,
         [157, 196, 218],
         [156, 195, 217],
         [156, 195, 217]],
 
        [[182, 214, 220],
         [178, 210, 216],
         [173, 204, 213],
         ...,
         [151, 192

# PPE detection

## Mapping classes  in filtered annotation and yaml file

In [10]:
import os
import shutil

def filter_and_remap_annotations_and_images(source_images_dir, source_labels_dir, target_class_ids, class_id_mapping, output_images_dir, output_labels_dir):
    if not os.path.exists(output_images_dir):
        os.makedirs(output_images_dir)
    if not os.path.exists(output_labels_dir):
        os.makedirs(output_labels_dir)

    label_files = [f for f in os.listdir(source_labels_dir) if f.endswith('.txt')]

    for label_file in label_files:
        label_path = os.path.join(source_labels_dir, label_file)
        image_file = label_file.replace('.txt', '.jpg')  # Adjust extension if needed
        image_path = os.path.join(source_images_dir, image_file)

        with open(label_path, 'r') as file:
            lines = file.readlines()

        # Filter and remap lines based on target_class_ids
        filtered_lines = []
        for line in lines:
            parts = line.split()
            class_id = int(parts[0])
            if class_id in target_class_ids:
                new_class_id = class_id_mapping.get(class_id, class_id)
                filtered_lines.append(f"{new_class_id} {' '.join(parts[1:])}\n")

        # Debugging: Print the class IDs found
        print(f"Processing file: {label_file}")
        print(f"Filtered class IDs: {[int(line.split()[0]) for line in filtered_lines]}")

        if filtered_lines:
            if os.path.exists(image_path):
                shutil.copy(image_path, output_images_dir)

            new_label_path = os.path.join(output_labels_dir, label_file)
            with open(new_label_path, 'w') as file:
                file.writelines(filtered_lines)

def prepare_dataset(source_images_dir, source_labels_dir, target_class_ids, class_id_mapping, train_images_dir, train_labels_dir, val_images_dir, val_labels_dir):
    for directory in [train_images_dir, train_labels_dir, val_images_dir, val_labels_dir]:
        if not os.path.exists(directory):
            os.makedirs(directory)

    all_label_files = [f for f in os.listdir(source_labels_dir) if f.endswith('.txt')]
    import random
    random.shuffle(all_label_files)
    split_index = int(len(all_label_files) * 0.8)
    train_label_files = all_label_files[:split_index]
    val_label_files = all_label_files[split_index:]

    for label_files, img_output_dir, lbl_output_dir in [(train_label_files, train_images_dir, train_labels_dir), (val_label_files, val_images_dir, val_labels_dir)]:
        filter_and_remap_annotations_and_images(source_images_dir, source_labels_dir, target_class_ids, class_id_mapping, img_output_dir, lbl_output_dir)

# Directories and parameters
source_images_dir = '/content/drive/MyDrive/datasets/images'
source_labels_dir = '/content/drive/MyDrive/datasets/labels'

# Target class IDs in the annotation file
target_class_ids = [1, 2, 4, 5, 6]  # IDs to include from the annotation files

# Mapping from annotation file class IDs to YAML class IDs
class_id_mapping = {
    1: 0,  # Map class ID 1 to 0
    2: 1,  # Map class ID 2 to 1
    4: 2,  # Map class ID 4 to 2
    5: 3,  # Map class ID 5 to 3
    6: 4   # Map class ID 6 to 4
}

# Output directories for filtered data
train_images_dir = '/content/drive/MyDrive/datasets/person_Detection/train/images'
train_labels_dir = '/content/drive/MyDrive/datasets/person_Detection/train/labels'
val_images_dir = '/content/drive/MyDrive/datasets/person_Detection/val/images'
val_labels_dir = '/content/drive/MyDrive/datasets/person_Detection/val/labels'

# Prepare the dataset
prepare_dataset(source_images_dir, source_labels_dir, target_class_ids, class_id_mapping, train_images_dir, train_labels_dir, val_images_dir, val_labels_dir)


Processing file: 001045.txt
Filtered class IDs: [0, 1]
Processing file: -4216-_png_jpg.rf.881e17f72716e3cbdaa9d20cf9558142.txt
Filtered class IDs: [0, 0, 0, 3]
Processing file: -4100-_png_jpg.rf.aebfe87c2b4f556f03d14fc3cc6facf7.txt
Filtered class IDs: [0, 0, 0, 3, 3, 3]
Processing file: -2435-_png_jpg.rf.d88968da6353df51746244bb3619cc5a.txt
Filtered class IDs: [0, 0, 0, 3, 3]
Processing file: -2091-_png_jpg.rf.24a38225fa17a89f450e6fcf90584bb5.txt
Filtered class IDs: [0, 1, 1, 3, 4]
Processing file: -2168-_png_jpg.rf.cd5ce7cad7216bda1d5a2e90d9ccdd4e.txt
Filtered class IDs: [0, 0, 3]
Processing file: -2180-_png_jpg.rf.9d63bb305e7747d22fe9a196dcc5ce13.txt
Filtered class IDs: [0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3]
Processing file: -2391-_png_jpg.rf.8781d03c5c7efeeb7fdaeb65e1dd0fc7.txt
Filtered class IDs: [0, 1, 3]
Processing file: -2390-_png_jpg.rf.fa4cf091a0bc051c044e2505719d3971.txt
Filtered class IDs: []
Processing file: -2293-_png_jpg.rf.b1d581d625ae74bb60b5d56e7b562654.txt
Filtered class I

## Training yolo8 for PPE detection

In [13]:
# Load YOLOv8n-cls, train it on mnist160 for 10 epochs and predict an image with it
from ultralytics import YOLO
model = YOLO("/content/drive/MyDrive/datasets/PPE_Detection/Copy of yolov8n (1).pt")  # load a pretrained YOLOv8n classification model
model.train(data='/content/drive/MyDrive/datasets/PPE_Detection/data.yaml', epochs=10)



Ultralytics YOLOv8.2.74 🚀 Python-3.10.12 torch-2.3.1+cu121 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/content/drive/MyDrive/datasets/PPE_Detection/Copy of yolov8n (1).pt, data=/content/drive/MyDrive/datasets/PPE_Detection/data.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames

[34m[1mtrain: [0mScanning /content/drive/MyDrive/datasets/person_Detection/train/labels.cache... 409 images, 0 backgrounds, 0 corrupt: 100%|██████████| 409/409 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /content/drive/MyDrive/datasets/person_Detection/val/labels.cache... 397 images, 0 backgrounds, 0 corrupt: 100%|██████████| 397/397 [00:00<?, ?it/s]

Plotting labels to runs/detect/train/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      1.431      3.582       1.18         94        640: 100%|██████████| 26/26 [05:38<00:00, 13.03s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:33<00:00,  7.23s/it]

                   all        397       2520     0.0187      0.386      0.139     0.0933






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      1.344      2.195      1.107         56        640: 100%|██████████| 26/26 [05:25<00:00, 12.53s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:36<00:00,  7.44s/it]

                   all        397       2520     0.0349       0.79      0.354      0.203






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G      1.341      1.934      1.102         45        640: 100%|██████████| 26/26 [05:37<00:00, 12.96s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:45<00:00,  8.11s/it]

                   all        397       2520      0.625      0.199      0.346      0.194






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G      1.307      1.807      1.092         56        640: 100%|██████████| 26/26 [05:27<00:00, 12.61s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:36<00:00,  7.43s/it]

                   all        397       2520      0.474      0.492       0.43       0.25






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      1.273      1.675      1.085         52        640: 100%|██████████| 26/26 [05:11<00:00, 12.00s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:36<00:00,  7.41s/it]

                   all        397       2520      0.614       0.51      0.541      0.321






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G      1.225      1.552      1.065         58        640: 100%|██████████| 26/26 [05:00<00:00, 11.54s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:30<00:00,  6.95s/it]

                   all        397       2520      0.623      0.572      0.612      0.388






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G       1.21      1.485      1.042         44        640: 100%|██████████| 26/26 [05:02<00:00, 11.64s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:38<00:00,  7.61s/it]

                   all        397       2520      0.672      0.601      0.662      0.424






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G      1.198      1.444       1.04         64        640: 100%|██████████| 26/26 [05:42<00:00, 13.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:46<00:00,  8.17s/it]

                   all        397       2520      0.734      0.627      0.694      0.452






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G      1.134      1.336      1.016         51        640: 100%|██████████| 26/26 [05:55<00:00, 13.67s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:47<00:00,  8.26s/it]

                   all        397       2520      0.751      0.655      0.725      0.477






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G      1.119       1.31      1.016         91        640: 100%|██████████| 26/26 [05:51<00:00, 13.52s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:48<00:00,  8.33s/it]

                   all        397       2520      0.777      0.689      0.755      0.498






10 epochs completed in 1.197 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics YOLOv8.2.74 🚀 Python-3.10.12 torch-2.3.1+cu121 CPU (Intel Xeon 2.20GHz)
Model summary (fused): 168 layers, 3,006,623 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [01:33<00:00,  7.19s/it]


                   all        397       2520      0.777      0.688      0.755      0.498
              hard-hat        375        984      0.911      0.912      0.956      0.738
                gloves        197        447      0.642      0.443       0.51      0.324
                 boots        217        895        0.8      0.694      0.789      0.433
                  vest        104        194      0.757      0.705      0.766      0.496
Speed: 2.1ms preprocess, 197.8ms inference, 0.0ms loss, 10.9ms postprocess per image
Results saved to [1mruns/detect/train[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 3, 4])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x782550fde7d0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0

In [35]:
model('/content/drive/MyDrive/datasets/whole_image.jpg')  # predict on an image


image 1/1 /content/drive/MyDrive/datasets/whole_image.jpg: 640x640 3 hard-hats, 40.2ms
Speed: 2.4ms preprocess, 40.2ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 640)


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'hard-hat', 1: 'gloves', 2: 'glasses', 3: 'boots', 4: 'vest'}
 obb: None
 orig_img: array([[[ 56, 150, 149],
         [ 59, 153, 152],
         [ 64, 158, 157],
         ...,
         [ 34,  84,  96],
         [ 35,  86, 102],
         [ 39,  90, 106]],
 
        [[ 63, 157, 156],
         [ 66, 160, 159],
         [ 70, 164, 163],
         ...,
         [ 22,  72,  84],
         [ 24,  73,  87],
         [ 25,  77,  90]],
 
        [[ 75, 172, 170],
         [ 77, 174, 172],
         [ 81, 178, 176],
         ...,
         [ 15,  67,  74],
         [ 18,  69,  79],
         [ 22,  73,  83]],
 
        ...,
 
        [[182, 214, 220],
         [179, 211, 217],
         [175, 206, 215],
         ...,
         [157, 196, 218],
         [156, 195, 217],
         [156, 195, 217]],
 
        [[182, 214, 220],
         [178, 210, 216],
      

## inference.py code is below

In [55]:
import os
import cv2
from ultralytics import YOLO

def perform_inference(model, image_path, output_dir):
    results = model.predict(image_path)
    results = results[0]  # Get the first result
    result_path = os.path.join(output_dir, os.path.basename(image_path))
    results.save(result_path)
    return results

def crop_person_images(full_image_path, results, output_dir):
    image = cv2.imread(full_image_path)
    cropped_images = []
    if len(results.boxes) == 0:
        print(f"No detections found in {full_image_path}")
        return cropped_images  # Return empty list if no detections

    for bbox in results.boxes:
        bbox_array = bbox.xyxy.cpu().numpy()  # Convert tensor to numpy array
        conf_array = bbox.conf.cpu().numpy()  # Get confidence scores
        cls_array = bbox.cls.cpu().numpy()    # Get class IDs

        for i in range(len(bbox_array)):
            x1, y1, x2, y2 = map(int, bbox_array[i])
            conf, cls = conf_array[i], int(cls_array[i])
            if cls == 0:  # Assuming class 0 is the person class
                crop = image[y1:y2, x1:x2]
                crop_filename = f"{os.path.splitext(os.path.basename(full_image_path))[0]}_crop_{cls}.jpg"
                crop_path = os.path.join(output_dir, crop_filename)
                cv2.imwrite(crop_path, crop)
                cropped_images.append((crop_path, x1, y1, x2, y2))

    return cropped_images

def map_predictions_to_full_image(cropped_images, model, full_image_path, output_dir):
    full_image = cv2.imread(full_image_path)
    detections_found = False

    for crop_path, x1, y1, x2, y2 in cropped_images:
        results = perform_inference(model, crop_path, output_dir)
        if len(results.boxes) == 0:
            print(f"No detections found in cropped image {crop_path}")
            continue  # Skip if no detections

        detections_found = True
        for bbox in results.boxes:
            bbox_array = bbox.xyxy.cpu().numpy()
            conf_array = bbox.conf.cpu().numpy()
            cls_array = bbox.cls.cpu().numpy()

            for i in range(len(bbox_array)):
                cx1, cy1, cx2, cy2 = map(int, bbox_array[i])
                conf, cls = conf_array[i], int(cls_array[i])
                full_bbox = [cx1 + x1, cy1 + y1, cx2 + x1, cy2 + y1]
                cv2.rectangle(full_image, (full_bbox[0], full_bbox[1]), (full_bbox[2], full_bbox[3]), (0, 255, 0), 2)
                label = f"Class {cls}, Conf {conf:.2f}"
                cv2.putText(full_image, label, (full_bbox[0], full_bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    if detections_found:
        result_path = os.path.join(output_dir, os.path.basename(full_image_path))
        cv2.imwrite(result_path, full_image)
    else:
        print(f"No detections found for {full_image_path}, skipping save.")

def main(input_dir, output_dir, person_det_model, ppe_detection_model):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    model1 = YOLO(person_det_model)
    model2 = YOLO(ppe_detection_model)

    for image_name in os.listdir(input_dir):
        if image_name.endswith('.jpg') or image_name.endswith('.png'):
            image_path = os.path.join(input_dir, image_name)

            results = perform_inference(model1, image_path, output_dir)

            cropped_images = crop_person_images(image_path, results, output_dir)

            map_predictions_to_full_image(cropped_images, model2, image_path, output_dir)

# Define parameters manually for Colab
input_dir = '/content/drive/MyDrive/datasets/images'
output_dir = '/content/runs/detect/results'
person_det_model = '/content/best-PD.pt'
ppe_detection_model = '/content/best-PPE.pt'

main(input_dir, output_dir, person_det_model, ppe_detection_model)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 3.9ms preprocess, 426.3ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /content/runs/detect/results/001216_jpg.rf.c7de195db643cb4d72f58f262b39b050_crop_0.jpg: 640x480 1 person, 296.9ms
Speed: 5.4ms preprocess, 296.9ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /content/runs/detect/results/001216_jpg.rf.c7de195db643cb4d72f58f262b39b050_crop_0.jpg: 640x480 1 person, 273.5ms
Speed: 5.0ms preprocess, 273.5ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /content/runs/detect/results/001216_jpg.rf.c7de195db643cb4d72f58f262b39b050_crop_0.jpg: 640x480 1 person, 276.5ms
Speed: 4.6ms preprocess, 276.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /content/drive/MyDrive/datasets/images/006336_jpg.rf.4882fa277106be1378a906016ab8a711.jpg: 640x640 4 hard-hats, 2 glovess, 3 bootss, 373.6ms
Speed: 3.6ms pre