# Train YOLO Model for Human Detection

In [5]:
import os
import torch
import shutil
from ultralytics import YOLO

In [6]:
# Check if CUDA gpu is available
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    print("Using CPU")

CUDA available: True
GPU: NVIDIA GeForce RTX 4060 Laptop GPU


In [7]:
# Project file paths
notebook_dir = os.getcwd()  # Current directory (notebooks)
project_root = os.path.dirname(notebook_dir)  # go up one level to get project root
data_dir = os.path.join(project_root, "data")
image_dir = os.path.join(data_dir, "images")
label_dir = os.path.join(data_dir, "labels")
model_dir = os.path.join(project_root, "models")
temp_label_dir = os.path.join(data_dir, "temp_labels")

print(f"Project root: {project_root}")
print(f"Data directory: {data_dir}")
print(f"Model directory: {model_dir}")
print(f"Temporary labels directory: {temp_label_dir}")

Project root: C:\Users\jrom\DataspellProjects\Drowning-Detection
Data directory: C:\Users\jrom\DataspellProjects\Drowning-Detection\data
Model directory: C:\Users\jrom\DataspellProjects\Drowning-Detection\models
Temporary labels directory: C:\Users\jrom\DataspellProjects\Drowning-Detection\data\temp_labels


## 1. Prepare data for YOLO training

In [8]:
# Create temporary labels directory for unified class
def create_temp_labels():
    # Create temp directories
    os.makedirs(os.path.join(temp_label_dir, "train"), exist_ok=True)
    os.makedirs(os.path.join(temp_label_dir, "val"), exist_ok=True)

    # Process train labels
    train_labels = os.path.join(label_dir, "train")
    for label_file in os.listdir(train_labels):
        if label_file.endswith(".txt"):
            input_path = os.path.join(train_labels, label_file)
            output_path = os.path.join(temp_label_dir, "train", label_file)

            with open(input_path, 'r') as infile:
                lines = infile.readlines()

            with open(output_path, 'w') as outfile:
                for line in lines:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        # Change class id to 0, keep other values the same
                        new_line = f"0 {' '.join(parts[1:])}\n"
                        outfile.write(new_line)

    # Process validation labels
    val_labels = os.path.join(label_dir, "val")
    for label_file in os.listdir(val_labels):
        if label_file.endswith(".txt"):
            input_path = os.path.join(val_labels, label_file)
            output_path = os.path.join(temp_label_dir, "val", label_file)

            with open(input_path, 'r') as infile:
                lines = infile.readlines()

            with open(output_path, 'w') as outfile:
                for line in lines:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        # Change class id to 0, keep other values the same
                        new_line = f"0 {' '.join(parts[1:])}\n"
                        outfile.write(new_line)

    print(f"Created temporary labels with unified class at {temp_label_dir}")

# Temporarily swap label folders
def swap_labels_for_training():
    backup_labels_dir = os.path.join(data_dir, "labels_backup")
    if os.path.exists(backup_labels_dir):
        shutil.rmtree(backup_labels_dir)
    shutil.move(label_dir, backup_labels_dir)
    shutil.copytree(temp_label_dir, label_dir)
    print("Swapped original labels with temporary labels for training.")
    return backup_labels_dir

# Restore original label folder after training
def restore_labels(backup_labels_dir):
    if os.path.exists(label_dir):
        shutil.rmtree(label_dir)
    shutil.move(backup_labels_dir, label_dir)
    print("Restored original labels after training.")

def create_data_yaml():
    yaml_content = f"""
path: {data_dir}
train: images/train
val: images/val
names:
  0: human
"""
    yaml_path = os.path.join(data_dir, "human_detection_data.yaml")
    with open(yaml_path, "w") as f:
        f.write(yaml_content.strip())
    print(f"Created data configuration at {yaml_path}")
    return yaml_path

# Create temporary labels and the data.yaml file
create_temp_labels()
data_yaml = create_data_yaml()
backup_labels_path = swap_labels_for_training()  # Swap labels before training

Created temporary labels with unified class at C:\Users\jrom\DataspellProjects\Drowning-Detection\data\temp_labels
Created data configuration at C:\Users\jrom\DataspellProjects\Drowning-Detection\data\human_detection_data.yaml
Swapped original labels with temporary labels for training.


## 2. Train YOLO11 Model

In [9]:
def train_yolo_model(data_yaml, epochs=50, imgsz=640, batch_size=16):
    print("Loading YOLO11s model...")
    model = YOLO("yolo11s.pt")

    print(f"Training model for {epochs} epochs...")
    results = model.train(
        data=data_yaml,
        epochs=epochs,
        imgsz=imgsz,
        batch=batch_size,
        device=0 if torch.cuda.is_available() else 'cpu',
        project=model_dir,
        name="human_detection",
        exist_ok=True,
        # Data augmentation settings
        hsv_v=0.4,     # Value (brightness) augmentation
        degrees=10.0,  # Rotation augmentation
        fliplr=0.5,    # Horizontal flip with 50% probability
        scale=0.5,     # Random scaling
        translate=0.1  # Translation augmentation
    )

    # Save the trained model
    model_path = os.path.join(model_dir, "human_detection_yolo11s.pt")
    model.save(model_path)
    print(f"Model saved to {model_path}")

    return model, results

## 3. Validate Model

In [10]:
def validate_model(data_yaml, batch_size=4):
    print("Loading best model for validation...")
    model = YOLO(os.path.join(model_dir, "human_detection", "weights", "best.pt"))  # Load best model

    print("Validating model...")
    results = model.val(
        data=data_yaml,
        batch=batch_size,
        device=0 if torch.cuda.is_available() else 'cpu'
    )

    # Print metrics
    metrics = results.box
    print("\nValidation Results:")
    print(f"mAP50: {metrics.map50.mean():.4f}")
    print(f"mAP50-95: {metrics.map.mean():.4f}")
    print(f"Precision: {metrics.p.mean():.4f}")
    print(f"Recall: {metrics.r.mean():.4f}")

    return results

## 4. Run all steps

In [11]:
model, training_results = train_yolo_model(data_yaml, epochs=20)

Loading YOLO11s model...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt to 'yolo11s.pt'...


100%|██████████| 18.4M/18.4M [00:04<00:00, 4.76MB/s]


Training model for 20 epochs...
New https://pypi.org/project/ultralytics/8.3.111 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.79  Python-3.12.3 torch-2.6.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolo11s.pt, data=C:\Users\jrom\DataspellProjects\Drowning-Detection\data\human_detection_data.yaml, epochs=20, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=8, project=C:\Users\jrom\DataspellProjects\Drowning-Detection\models, name=human_detection, exist_ok=True, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, 

100%|██████████| 5.35M/5.35M [00:00<00:00, 20.2MB/s]


[34m[1mAMP: [0mchecks passed 


[34m[1mtrain: [0mScanning C:\Users\jrom\DataspellProjects\Drowning-Detection\data\labels\train... 7000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7000/7000 [00:02<00:00, 2488.41it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\jrom\DataspellProjects\Drowning-Detection\data\labels\train.cache


[34m[1mval: [0mScanning C:\Users\jrom\DataspellProjects\Drowning-Detection\data\labels\val... 1572 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1572/1572 [00:01<00:00, 838.71it/s]


[34m[1mval: [0mNew cache created: C:\Users\jrom\DataspellProjects\Drowning-Detection\data\labels\val.cache
Plotting labels to C:\Users\jrom\DataspellProjects\Drowning-Detection\models\human_detection\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mC:\Users\jrom\DataspellProjects\Drowning-Detection\models\human_detection[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      4.05G      1.492      1.398      1.233         20        640: 100%|██████████| 438/438 [01:53<00:00,  3.87it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:14<00:00,  3.41it/s]


                   all       1572       2317      0.908      0.923      0.963      0.615

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20      4.06G      1.421     0.8074      1.195         21        640: 100%|██████████| 438/438 [01:47<00:00,  4.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.62it/s]


                   all       1572       2317       0.55      0.785      0.524      0.307

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      4.06G      1.384     0.7648       1.17         23        640: 100%|██████████| 438/438 [01:44<00:00,  4.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.80it/s]


                   all       1572       2317       0.94      0.976      0.987      0.663

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      4.03G      1.339     0.7086      1.153          9        640: 100%|██████████| 438/438 [01:43<00:00,  4.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.83it/s]

                   all       1572       2317      0.977      0.981      0.989      0.613






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      4.05G      1.308     0.6742      1.135         25        640: 100%|██████████| 438/438 [01:43<00:00,  4.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:12<00:00,  3.86it/s]

                   all       1572       2317      0.981      0.984      0.993      0.666






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      4.04G      1.272     0.6395      1.112         24        640: 100%|██████████| 438/438 [01:43<00:00,  4.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:12<00:00,  3.89it/s]

                   all       1572       2317      0.985      0.984      0.993      0.714






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      4.06G      1.238     0.6165        1.1         19        640: 100%|██████████| 438/438 [01:43<00:00,  4.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.80it/s]

                   all       1572       2317      0.985      0.985      0.993      0.647






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      4.04G       1.22     0.5927      1.089         29        640: 100%|██████████| 438/438 [01:43<00:00,  4.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.78it/s]

                   all       1572       2317      0.981      0.987      0.994      0.725






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      4.05G      1.193     0.5758      1.078         27        640: 100%|██████████| 438/438 [01:43<00:00,  4.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:12<00:00,  3.87it/s]

                   all       1572       2317      0.988      0.995      0.994      0.731






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      4.05G      1.177     0.5554       1.07         17        640: 100%|██████████| 438/438 [01:43<00:00,  4.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.78it/s]

                   all       1572       2317      0.983      0.994      0.993       0.72





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      4.07G      1.137     0.5145      1.069         12        640: 100%|██████████| 438/438 [01:43<00:00,  4.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:12<00:00,  3.87it/s]

                   all       1572       2317      0.984      0.992      0.993      0.691






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      4.04G      1.123     0.5082      1.061          8        640: 100%|██████████| 438/438 [01:43<00:00,  4.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:12<00:00,  3.87it/s]

                   all       1572       2317       0.99      0.994      0.993      0.669






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      4.05G      1.108     0.4966      1.052         11        640: 100%|██████████| 438/438 [01:41<00:00,  4.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:12<00:00,  3.86it/s]

                   all       1572       2317      0.988      0.995      0.994       0.72






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      4.05G      1.098     0.4871      1.041         11        640: 100%|██████████| 438/438 [01:41<00:00,  4.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:12<00:00,  3.87it/s]

                   all       1572       2317      0.988      0.997      0.994      0.739






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      4.06G      1.073     0.4742      1.033         14        640: 100%|██████████| 438/438 [01:42<00:00,  4.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.85it/s]

                   all       1572       2317      0.989      0.997      0.994      0.739






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      4.05G      1.059     0.4584      1.029         11        640: 100%|██████████| 438/438 [01:42<00:00,  4.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.84it/s]

                   all       1572       2317      0.993      0.996      0.994      0.715






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      4.06G      1.032     0.4423      1.015         12        640: 100%|██████████| 438/438 [01:42<00:00,  4.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.77it/s]

                   all       1572       2317      0.995      0.997      0.994      0.755






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      4.05G      1.021     0.4324      1.008          8        640: 100%|██████████| 438/438 [01:42<00:00,  4.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.81it/s]

                   all       1572       2317      0.991      0.997      0.994      0.778






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      4.06G     0.9981     0.4214     0.9995         11        640: 100%|██████████| 438/438 [01:42<00:00,  4.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.83it/s]

                   all       1572       2317      0.991      0.998      0.994      0.777






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      4.05G     0.9912     0.4102      0.996         13        640: 100%|██████████| 438/438 [01:41<00:00,  4.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.83it/s]

                   all       1572       2317      0.994      0.997      0.994      0.777






20 epochs completed in 0.666 hours.
Optimizer stripped from C:\Users\jrom\DataspellProjects\Drowning-Detection\models\human_detection\weights\last.pt, 19.2MB
Optimizer stripped from C:\Users\jrom\DataspellProjects\Drowning-Detection\models\human_detection\weights\best.pt, 19.2MB

Validating C:\Users\jrom\DataspellProjects\Drowning-Detection\models\human_detection\weights\best.pt...
Ultralytics 8.3.79  Python-3.12.3 torch-2.6.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLO11s summary (fused): 100 layers, 9,413,187 parameters, 0 gradients, 21.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:13<00:00,  3.67it/s]


                   all       1572       2317      0.991      0.997      0.994      0.778
Speed: 0.2ms preprocess, 2.1ms inference, 0.0ms loss, 1.6ms postprocess per image
Results saved to [1mC:\Users\jrom\DataspellProjects\Drowning-Detection\models\human_detection[0m
Model saved to C:\Users\jrom\DataspellProjects\Drowning-Detection\models\human_detection_yolo11s.pt


In [None]:
validation_results = validate_model(data_yaml)