# Augmenting Data Pipeline with Albumentations

1. Load dependencies
2. Instantiate variables
3. Define transformations
4. Iterate over existing dataset and apply transformations
5. Save augmented data to labels/augmented and images/augmented

#### Load Dependencies

In [1]:
import os
import random
import sys

sys.path.append(os.path.abspath('../src'))
import utils

import cv2

import albumentations as A

#### Instantiate Variables

In [2]:
random.seed(21)

# for displaying augmented images
window_width, window_height, window_x, window_y = utils.get_screen_info()

# directories
src_img_dir = "../data/images/processed"
src_label_dir = "../data/labels/formatted"
dest_img_dir = "../data/images/augmented"
dest_label_dir = "../data/labels/augmented"

# list of original images
orig_images = os.listdir(src_img_dir)

#### Define Transformations
Look on [Albumentations]('https://explore.albumentations.ai/') for various transformations available. Currently using:
- Horizontal flip (10% chance)
- Adjust brightness/contrast (50% chance)


In [3]:
# function for augmenting images (horizontal flip (10% chance), alter brightness/contrast (80% chance))
transform = A.Compose([
    A.HorizontalFlip(p=0.1),
    A.RandomBrightnessContrast(
        brightness_limit = [-0.2, 0.2],
        contrast_limit = [-0.2, 0.2],
        brightness_by_max = True,
        ensure_safe_range = False,
        p=0.5
    )
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

#### Iterate over original dataset and apply transformations

In [5]:
for image in orig_images:

    # source paths for image and label
    src_label_path = os.path.join(src_label_dir, image.split('.')[0] + '.txt')
    src_img_path = os.path.join(src_img_dir, image)

    # load source img
    img = cv2.imread(src_img_path)

    # stores original bboxes and labels
    bboxes = []
    labels = []
    with open(src_label_path, 'r') as src_file:
        for line in src_file:
            keys = line.split(' ')
            if keys[0] == '\n':
                break
            bboxes.append([float(keys[1]), float(keys[2]), float(keys[3]), float(keys[4].replace('\n', ''))])
            labels.append(int(keys[0]))
    
    # Apply transformation on original image, preserve original annotations
    try: 
        transformed = transform(image=img, bboxes=bboxes, class_labels=labels)
        transformed_image = transformed['image']
        transformed_bboxes = transformed['bboxes']
        transformed_labels = transformed['class_labels']

        # save augmented image
        augmented_img_path = dest_img_dir + '/augmented_' + image
        cv2.imwrite(augmented_img_path, transformed_image)

        # save augmented labels
        augmented_label_path = dest_label_dir + '/augmented_' + image.split('.')[0] + '.txt'
        with open(augmented_label_path, 'w') as dest_file:
            for bbox, label in zip(transformed_bboxes, transformed_labels):
                rounded_bbox = [round(val, 6) for val in bbox]
                dest_file.write(str(int(label)) + " " + " ".join(map(str,rounded_bbox)) + '\n')
    except ValueError as e:
        print(f"Value Error with: {image}, {e}")


Value Error with: 20250222_152541M_000513.jpg, Expected y_min for bbox [    0.42708   -0.041295     0.47135    0.041295           1] to be in the range [0.0, 1.0], got -0.04129450023174286.
Value Error with: 20250222_152541M_002203.jpg, Expected x_min for bbox [-4.9965e-07     0.51563    0.011719     0.56696           2] to be in the range [0.0, 1.0], got -4.996545612812042e-07.
Value Error with: 20250222_153041M_003116.jpg, Expected x_min for bbox [-4.9965e-07      0.2567    0.013021     0.33705           2] to be in the range [0.0, 1.0], got -4.996545612812042e-07.
Value Error with: 20250222_153041M_004618.jpg, Expected y_min for bbox [    0.50651 -4.9919e-07     0.55729     0.13839           1] to be in the range [0.0, 1.0], got -4.991888999938965e-07.
Value Error with: 20250222_153041M_004620.jpg, Expected y_min for bbox [    0.51042 -4.9919e-07     0.55729     0.10268           1] to be in the range [0.0, 1.0], got -4.991888999938965e-07.


In [7]:
from ultralytics import YOLO

#utils.train_val_split(dest_img_dir, dest_label_dir, '../data/model_data/images/train', '../data/model_data/labels/train', '../data/model_data/images/validation', '../data/model_data/labels/validation')

model = YOLO('yolov8n.yaml')

results = model.train(
        data = "../SLD.yaml", 
        epochs=30, 
        imgsz=768, 
        device=0, 
        project="../runs", 
        name="augmented_2025-03-14_21-00-21"
    )


New https://pypi.org/project/ultralytics/8.3.90 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.80  Python-3.10.11 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4070 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.yaml, data=../SLD.yaml, epochs=30, time=None, patience=100, batch=16, imgsz=768, save=True, save_period=-1, cache=False, device=0, workers=8, project=../runs, name=augmented_2025-03-14_21-00-21, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_ma

[34m[1mtrain: [0mScanning C:\Users\colet\Documents\GitHub\Red-Light-Runners\data\model_data\labels\train... 12026 images, 1615 backgrounds, 0 corrupt: 100%|██████████| 12026/12026 [00:05<00:00, 2145.63it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\colet\Documents\GitHub\Red-Light-Runners\data\model_data\labels\train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning C:\Users\colet\Documents\GitHub\Red-Light-Runners\data\model_data\labels\validation... 3007 images, 418 backgrounds, 0 corrupt: 100%|██████████| 3007/3007 [00:01<00:00, 1678.57it/s]


[34m[1mval: [0mNew cache created: C:\Users\colet\Documents\GitHub\Red-Light-Runners\data\model_data\labels\validation.cache
Plotting labels to ..\runs\augmented_2025-03-14_21-00-21\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.00125, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 768 train, 768 val
Using 8 dataloader workers
Logging results to [1m..\runs\augmented_2025-03-14_21-00-21[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30      2.86G      3.106      24.21      1.661         60        768: 100%|██████████| 752/752 [02:31<00:00,  4.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.30it/s]


                   all       3007       9808      0.678      0.339       0.36      0.187

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30      2.96G      1.833       2.01     0.9687         59        768: 100%|██████████| 752/752 [02:39<00:00,  4.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.32it/s]


                   all       3007       9808      0.896      0.485      0.557      0.341

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/30      2.87G      1.566      1.239     0.9192         60        768: 100%|██████████| 752/752 [02:42<00:00,  4.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.35it/s]

                   all       3007       9808      0.696      0.637      0.692      0.425






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/30      2.98G      1.452     0.9847     0.8969         61        768: 100%|██████████| 752/752 [02:44<00:00,  4.57it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:22<00:00,  4.20it/s]

                   all       3007       9808      0.766      0.712      0.775      0.467






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/30      2.97G      1.353     0.8514     0.8813         65        768: 100%|██████████| 752/752 [02:47<00:00,  4.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:22<00:00,  4.12it/s]

                   all       3007       9808       0.89      0.784      0.857      0.521






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/30      2.96G      1.292      0.769     0.8697         81        768: 100%|██████████| 752/752 [02:44<00:00,  4.57it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:22<00:00,  4.19it/s]

                   all       3007       9808      0.908      0.836      0.907      0.574






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/30      2.97G      1.237     0.7209     0.8637         61        768: 100%|██████████| 752/752 [02:45<00:00,  4.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.37it/s]

                   all       3007       9808      0.939      0.838      0.913      0.593






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/30      2.96G      1.211      0.688     0.8565         35        768: 100%|██████████| 752/752 [02:45<00:00,  4.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.29it/s]

                   all       3007       9808      0.915      0.867      0.933      0.597






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/30      2.94G      1.171     0.6445     0.8525         66        768: 100%|██████████| 752/752 [02:44<00:00,  4.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:22<00:00,  4.23it/s]

                   all       3007       9808      0.936      0.882      0.951      0.611






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/30      2.96G      1.153     0.6219     0.8505         43        768: 100%|██████████| 752/752 [02:47<00:00,  4.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.41it/s]

                   all       3007       9808      0.925      0.877       0.95      0.621






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/30      2.94G      1.133     0.6079     0.8466         61        768: 100%|██████████| 752/752 [02:47<00:00,  4.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:23<00:00,  4.03it/s]

                   all       3007       9808      0.924      0.906      0.962      0.637






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/30      2.97G      1.117     0.5904      0.841         41        768: 100%|██████████| 752/752 [02:45<00:00,  4.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.38it/s]

                   all       3007       9808      0.953       0.92      0.975      0.651






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/30      2.97G      1.096      0.571     0.8398         45        768: 100%|██████████| 752/752 [02:42<00:00,  4.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.29it/s]

                   all       3007       9808      0.963      0.919      0.971      0.658






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/30      2.97G      1.084     0.5603     0.8396         64        768: 100%|██████████| 752/752 [02:38<00:00,  4.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.29it/s]

                   all       3007       9808      0.947      0.934      0.972      0.657






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/30      2.96G      1.064     0.5538     0.8368         70        768: 100%|██████████| 752/752 [02:49<00:00,  4.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.28it/s]


                   all       3007       9808      0.949      0.943      0.979      0.667

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/30      2.97G      1.063     0.5477     0.8359         36        768: 100%|██████████| 752/752 [02:39<00:00,  4.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:22<00:00,  4.23it/s]

                   all       3007       9808      0.959      0.946       0.98      0.672






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/30      2.96G      1.041     0.5347     0.8336         52        768: 100%|██████████| 752/752 [02:38<00:00,  4.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.42it/s]

                   all       3007       9808      0.955      0.951      0.984      0.679






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/30      2.96G      1.027     0.5157     0.8321         74        768: 100%|██████████| 752/752 [02:42<00:00,  4.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.31it/s]

                   all       3007       9808      0.961      0.934      0.981      0.685






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/30      2.96G      1.014     0.5082     0.8296         64        768: 100%|██████████| 752/752 [02:38<00:00,  4.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.30it/s]

                   all       3007       9808      0.959      0.951      0.986      0.691






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/30      2.93G      1.002     0.4986     0.8279         39        768: 100%|██████████| 752/752 [02:39<00:00,  4.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.46it/s]

                   all       3007       9808       0.96       0.96      0.987      0.702





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/30      2.92G     0.9565     0.4681     0.8295         32        768: 100%|██████████| 752/752 [02:20<00:00,  5.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:22<00:00,  4.09it/s]

                   all       3007       9808      0.967      0.958      0.987      0.696






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/30      2.92G      0.939     0.4544     0.8266         29        768: 100%|██████████| 752/752 [02:40<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.32it/s]

                   all       3007       9808      0.966      0.948      0.984      0.699






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/30      2.92G     0.9221     0.4461     0.8285         28        768: 100%|██████████| 752/752 [02:32<00:00,  4.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.40it/s]

                   all       3007       9808      0.968      0.969      0.988      0.706






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/30      2.92G     0.9087     0.4347     0.8265         27        768: 100%|██████████| 752/752 [02:32<00:00,  4.94it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.28it/s]


                   all       3007       9808      0.969      0.971      0.987      0.704

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/30      2.92G     0.8916     0.4293     0.8222         33        768: 100%|██████████| 752/752 [02:33<00:00,  4.91it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.32it/s]

                   all       3007       9808      0.963      0.959      0.982      0.705






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/30       2.9G     0.8839     0.4202     0.8214         28        768: 100%|██████████| 752/752 [02:31<00:00,  4.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.35it/s]

                   all       3007       9808      0.964      0.974      0.988      0.716






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/30      2.91G     0.8689     0.4182     0.8198         29        768: 100%|██████████| 752/752 [02:31<00:00,  4.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.43it/s]

                   all       3007       9808      0.972      0.968      0.987      0.715






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/30      2.91G     0.8584     0.4083     0.8191         30        768: 100%|██████████| 752/752 [02:32<00:00,  4.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:20<00:00,  4.54it/s]

                   all       3007       9808      0.975      0.973       0.99      0.724






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/30       2.9G     0.8462     0.4003     0.8189         34        768: 100%|██████████| 752/752 [02:30<00:00,  4.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.44it/s]

                   all       3007       9808      0.969      0.976      0.989      0.728






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/30      2.91G     0.8385     0.3963      0.817         22        768: 100%|██████████| 752/752 [02:30<00:00,  4.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:21<00:00,  4.35it/s]


                   all       3007       9808      0.971      0.971      0.989      0.729

30 epochs completed in 1.525 hours.
Optimizer stripped from ..\runs\augmented_2025-03-14_21-00-21\weights\last.pt, 6.2MB
Optimizer stripped from ..\runs\augmented_2025-03-14_21-00-21\weights\best.pt, 6.2MB

Validating ..\runs\augmented_2025-03-14_21-00-21\weights\best.pt...
Ultralytics 8.3.80  Python-3.10.11 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4070 Laptop GPU, 8188MiB)
YOLOv8n summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 94/94 [00:23<00:00,  3.98it/s]


                   all       3007       9808      0.971      0.971      0.989      0.729
           green_light       1247       2972      0.969      0.976      0.991      0.737
             red_light       2306       6568      0.979       0.98      0.994      0.825
          yellow_light        149        268      0.966      0.956      0.983      0.624
Speed: 0.2ms preprocess, 1.4ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1m..\runs\augmented_2025-03-14_21-00-21[0m
