## **Format Dataset for YOLOv5**

In [1]:
import os
import shutil
import random
from tqdm import tqdm

# Define the base directory containing the SH17 dataset
base_dir = "/kaggle/input/sh17-dataset-for-ppe-detection"

# Paths to the train and validation file lists
train_file = os.path.join(base_dir, "train_files.txt")
val_file = os.path.join(base_dir, "val_files.txt")

# Directories for images and labels in the source dataset
images_dir = os.path.join(base_dir, "images")
labels_dir = os.path.join(base_dir, "labels")

# Output directory for prepared data
output_dir = "/kaggle/working/data"
os.makedirs(output_dir, exist_ok=True)

# Directories for train, validation, and test sets (images and labels)
train_images_dir = os.path.join(output_dir, "images/train")
val_images_dir = os.path.join(output_dir, "images/val")
test_images_dir = os.path.join(output_dir, "images/test")

train_labels_dir = os.path.join(output_dir, "labels/train")
val_labels_dir = os.path.join(output_dir, "labels/val")
test_labels_dir = os.path.join(output_dir, "labels/test")

# Create all required directories if they do not exist
for dir_path in [train_images_dir, val_images_dir, test_images_dir,
                 train_labels_dir, val_labels_dir, test_labels_dir]:
    os.makedirs(dir_path, exist_ok=True)

def copy_files(file_list, dest_images_dir, dest_labels_dir):
    """
    Copies image and corresponding label files to the specified directories.

    Args:
        file_list (list): List of file names to be copied.
        dest_images_dir (str): Destination directory for images.
        dest_labels_dir (str): Destination directory for labels.
    """
    for file in tqdm(file_list):
        file = file.strip()

        # Copy the image file
        image_path = os.path.join(images_dir, file)
        if os.path.exists(image_path):
            shutil.copy(image_path, dest_images_dir)
        else:
            print(f"Image not found: {image_path}")

        # Copy the corresponding label file
        label_path = os.path.join(labels_dir, file.replace(
            image_path.split(".")[-1], "txt"))
        if os.path.exists(label_path):
            shutil.copy(label_path, dest_labels_dir)
        else:
            print(f"Label not found: {label_path}")

# Read the list of training files from train_files.txt
with open(train_file, 'r') as f:
    train_files = f.readlines()

# Shuffle and split the training data into 80% training and 20% validation
random.shuffle(train_files)
split_idx = int(0.8 * len(train_files))
train_split = train_files[:split_idx]
val_split = train_files[split_idx:]

# Copy the training split files
print("Copying train files...")
copy_files(train_split, train_images_dir, train_labels_dir)

# Copy the validation split files
print("Copying val files...")
copy_files(val_split, val_images_dir, val_labels_dir)

# Copy the test files listed in val_files.txt
with open(val_file, 'r') as f:
    test_files = f.readlines()

print("Copying test files...")
copy_files(test_files, test_images_dir, test_labels_dir)

print("Data prepared!")

Copying train files...


100%|██████████| 5183/5183 [03:30<00:00, 24.63it/s]


Copying val files...


100%|██████████| 1296/1296 [00:52<00:00, 24.83it/s]


Copying test files...


100%|██████████| 1620/1620 [01:05<00:00, 24.87it/s]

Data prepared!





## **Setup environment for YOLOv5**

In [2]:
!git clone https://github.com/ultralytics/yolov5.git
%cd yolov5

Cloning into 'yolov5'...
remote: Enumerating objects: 17129, done.[K
remote: Counting objects: 100% (49/49), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 17129 (delta 32), reused 10 (delta 10), pack-reused 17080 (from 4)[K
Receiving objects: 100% (17129/17129), 15.84 MiB | 31.55 MiB/s, done.
Resolving deltas: 100% (11744/11744), done.
/kaggle/working/yolov5


In [3]:
!pip install -r requirements.txt
!pip install -q comet_ml

Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.2.34 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.3.61-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics>=8.2.34->-r requirements.txt (line 18))
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Downloading ultralytics-8.3.61-py3-none-any.whl (906 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m906.9/906.9 kB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.3.61 ultralytics-thop-2.0.13
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m710.6/710.6 kB[0m [31

## **Create YAML file**

In [4]:
import yaml

# Define the data to be written into the YAML file
data = {
    "names": [
        "Person", "Ear", "Earmuffs", "Face", "Face-guard", "Face-mask-medical",
        "Foot", "Tools", "Glasses", "Gloves", "Helmet", "Hands", "Head",
        "Medical-suit", "Shoes", "Safety-suit", "Safety-vest"
    ],  
    "nc": 17,  
    "test": "/kaggle/working/data/images/test",  
    "train": "/kaggle/working/data/images/train", 
    "val": "/kaggle/working/data/images/val"
}

# Define the output path for the YAML file
yaml_file_path = "../yolo.yaml"

# Write the data to the YAML file
with open(yaml_file_path, "w") as yaml_file:
    yaml.dump(data, yaml_file, default_flow_style=False)

print(f"YAML file has been created: {yaml_file_path}")

YAML file has been created: ../yolo.yaml


## **Update AMP Autocast Syntax in train.py**

In [5]:
# Define the file path for the script to be updated
file_path = "/kaggle/working/yolov5/train.py"

# Read the content of the file line by line
with open(file_path, "r") as file:
    lines = file.readlines()

# Initialize a list to store updated lines
updated_lines = []

# Iterate through each line and update specific syntax
for line in lines:
    if "with torch.cuda.amp.autocast(amp):" in line:
        # Replace the deprecated torch.cuda.amp.autocast syntax with the new syntax
        line = line.replace(
            "with torch.cuda.amp.autocast(amp):",
            "with torch.amp.autocast(device_type='cuda', dtype=torch.float16):"
        )
    # Add the updated line to the list
    updated_lines.append(line)

# Write the modified content back to the file
with open(file_path, "w") as file:
    file.writelines(updated_lines)

# Notify the user that the updates have been applied
print("The deprecated `torch.cuda.amp.autocast` warnings in train.py have been updated!")




## **Train model YOLOv5**

In [6]:
!python train.py --img 640 --epochs 10 --batch-size 16 --data ../yolo.yaml --weights yolov5m.pt --project /kaggle/working/results/yolov5/train

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
2025-01-14 00:33:28.680985: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-14 00:33:28.871572: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-14 00:33:28.925470: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mwandb[0m: Using wandb-core as the SDK 

## **Evaluate on validation set**

In [7]:
!python val.py --weights /kaggle/working/results/yolov5/train/exp/weights/best.pt \
               --data ../yolo.yaml \
               --img 640 \
               --batch-size 16 \
               --project /kaggle/working/results/yolov5/val

[34m[1mval: [0mdata=../yolo.yaml, weights=['/kaggle/working/results/yolov5/train/exp/weights/best.pt'], batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=/kaggle/working/results/yolov5/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v7.0-395-g6420a1db Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)

Fusing layers... 
Model summary: 212 layers, 20917590 parameters, 0 gradients, 48.1 GFLOPs
[34m[1mval: [0mScanning /kaggle/working/data/labels/val.cache... 1296 images, 0 background[0m
                 Class     Images  Instances          P          R      mAP50   
                   all       1296      12518      0.787      0.386      0.441      0.266
                Person       1296       2322      0.897       0.86        0.9      0.685
                   Ear 

## **Evaluate on test set**

In [8]:
!python val.py --weights /kaggle/working/results/yolov5/train/exp/weights/best.pt \
               --data ../yolo.yaml \
               --img 640 \
               --batch-size 16 \
               --task test \
               --project /kaggle/working/results/yolov5/test

[34m[1mval: [0mdata=../yolo.yaml, weights=['/kaggle/working/results/yolov5/train/exp/weights/best.pt'], batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=test, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=/kaggle/working/results/yolov5/test, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v7.0-395-g6420a1db Python-3.10.12 torch-2.4.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)

Fusing layers... 
Model summary: 212 layers, 20917590 parameters, 0 gradients, 48.1 GFLOPs
[34m[1mtest: [0mScanning /kaggle/working/data/labels/test... 1620 images, 0 backgrounds, 0[0m
[34m[1mtest: [0mNew cache created: /kaggle/working/data/labels/test.cache
                 Class     Images  Instances          P          R      mAP50   
                   all       1620      15358      0.795      0.426      0.466      0.282
                Person       1620

## **Zip result folder**

In [9]:
import zipfile
import os

def zip_folder(folder_path, output_zip_path):
    """
    Compress an entire folder into a .zip file.

    Args:
        folder_path (str): Path to the folder to be compressed.
        output_zip_path (str): Path to save the resulting zip file.
    """
    try:
        # Create a new zip file with write mode and compression
        with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            # Traverse through all files in the folder
            for root, dirs, files in os.walk(folder_path):
                for file in files:
                    # Get the full path of the current file
                    full_path = os.path.join(root, file)
                    # Calculate the relative path for the zip file
                    relative_path = os.path.relpath(full_path, folder_path)
                    # Add the file to the zip archive with its relative path
                    zipf.write(full_path, relative_path)
        print(f"Folder {folder_path} has been compressed into {output_zip_path}")
    except Exception as e:
        # Catch any exceptions and print an error message
        print(f"Error while compressing the folder: {e}")

# Use the function to compress a folder
results_folder = "/kaggle/working/results"  # Path to the folder to compress
output_zip = "/kaggle/working/results.zip"  # Path for the output zip file
zip_folder(results_folder, output_zip)

Folder /kaggle/working/results has been compressed into /kaggle/working/results.zip
