# Augmenting Data Pipeline with Albumentations

1. Load dependencies
2. Instantiate variables
3. Define transformations
4. Iterate over existing dataset and apply transformations
5. Save augmented data to labels/augmented and images/augmented

#### Load Dependencies

In [1]:
import os
import random
import sys

sys.path.append(os.path.abspath('../utils'))
import screen_info

import cv2

import albumentations as A

#### Instantiate Variables

In [2]:
random.seed(21)

# for displaying augmented images
window_width, window_height, window_x, window_y = screen_info.get_screen_info()

# directories
src_img_dir = "../data/images/processed"
src_label_dir = "../data/labels/formatted"
dest_img_dir = "../data/images/augmented"
dest_label_dir = "../data/labels/augmented"

# list of original images
orig_images = os.listdir(src_img_dir)

#### Define Transformations
Look on [Albumentations]('https://explore.albumentations.ai/') for various transformations available. Currently using:
- Horizontal flip (10% chance)
- Adjust brightness/contrast (50% chance)


In [3]:
# function for augmenting images (horizontal flip (10% chance), alter brightness/contrast (80% chance))
transform = A.Compose([
    A.HorizontalFlip(p=0.1),
    A.RandomBrightnessContrast(
        brightness_limit = [-0.2, 0.2],
        contrast_limit = [-0.2, 0.2],
        brightness_by_max = True,
        ensure_safe_range = False,
        p=1
    )
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

#### Iterate over original dataset and apply transformations

In [4]:
for image in orig_images:

    # source paths for image and label
    src_label_path = os.path.join(src_label_dir, image.split('.')[0] + '.txt')
    src_img_path = os.path.join(src_img_dir, image)

    # load source img
    img = cv2.imread(src_img_path)

    # stores original bboxes and labels
    bboxes = []
    labels = []
    with open(src_label_path, 'r') as src_file:
        for line in src_file:
            keys = line.split(' ')
            if keys[0] == '\n':
                break
            bboxes.append([float(keys[1]), float(keys[2]), float(keys[3]), float(keys[4].replace('\n', ''))])
            labels.append(int(keys[0]))
    
    # Apply transformation on original image, preserve original annotations
    try: 
        transformed = transform(image=img, bboxes=bboxes, class_labels=labels)
        transformed_image = transformed['image']
        transformed_bboxes = transformed['bboxes']
        transformed_labels = transformed['class_labels']

        # save augmented image
        augmented_img_path = dest_img_dir + '/augmented_' + image
        cv2.imwrite(augmented_img_path, transformed_image)

        # save augmented labels
        augmented_label_path = dest_label_dir + '/augmented_' + image.split('.')[0] + '.txt'
        with open(augmented_label_path, 'w') as dest_file:
            for bbox, label in zip(transformed_bboxes, transformed_labels):
                rounded_bbox = [round(val, 6) for val in bbox]
                dest_file.write(str(int(label)) + " " + " ".join(map(str,rounded_bbox)) + '\n')
    except ValueError as e:
        print(f"Value Error with: {image}, {e}")


Value Error with: 20250222_152541M_000513.jpg, Expected y_min for bbox [ 0.4270835  -0.0412945   0.47135448  0.0412945   1.        ] to be in the range [0.0, 1.0], got -0.04129450023174286.
Value Error with: 20250222_152541M_002203.jpg, Expected x_min for bbox [-4.9965456e-07  5.1562548e-01  1.1718500e-02  5.6696451e-01
  2.0000000e+00] to be in the range [0.0, 1.0], got -4.996545612812042e-07.
Value Error with: 20250222_153041M_003116.jpg, Expected x_min for bbox [-4.9965456e-07  2.5669649e-01  1.3020501e-02  3.3705351e-01
  2.0000000e+00] to be in the range [0.0, 1.0], got -4.996545612812042e-07.
Value Error with: 20250222_153041M_004618.jpg, Expected y_min for bbox [ 5.0651050e-01 -4.9918890e-07  5.5729151e-01  1.3839251e-01
  1.0000000e+00] to be in the range [0.0, 1.0], got -4.991888999938965e-07.
Value Error with: 20250222_153041M_004620.jpg, Expected y_min for bbox [ 5.104165e-01 -4.991889e-07  5.572915e-01  1.026785e-01  1.000000e+00] to be in the range [0.0, 1.0], got -4.99188

In [6]:
import data_management
img_train_dir = '../data/model_data/images/train'
img_val_dir = '../data/model_data/images/validation'

label_train_dir = '../data/model_data/labels/train'
label_val_dir = '../data/model_data/labels/validation'
data_management.train_val_split('../data/images/processed','../data/labels/formatted', img_train_dir, label_train_dir, img_val_dir, label_val_dir)

In [1]:
from ultralytics import YOLO

#utils.train_val_split(dest_img_dir, dest_label_dir, '../data/model_data/images/train', '../data/model_data/labels/train', '../data/model_data/images/validation', '../data/model_data/labels/validation')

model = YOLO('yolov8n.yaml')

results = model.train(
        data = "../SLD.yaml", 
        epochs=30, 
        imgsz=768, 
        device=0, 
        project="../runs", 
        name="augmented_2025-03-15_14-21-01"
    )


New https://pypi.org/project/ultralytics/8.3.91 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.80  Python-3.10.11 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4070 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.yaml, data=../SLD.yaml, epochs=30, time=None, patience=100, batch=16, imgsz=768, save=True, save_period=-1, cache=False, device=0, workers=8, project=../runs, name=augmented_2025-03-15_14-21-012, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_m

[34m[1mtrain: [0mScanning C:\Users\colet\Documents\GitHub\Red-Light-Runners\data\model_data\labels\train.cache... 24058 images, 3257 backgrounds, 0 corrupt: 100%|██████████| 24058/24058 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning C:\Users\colet\Documents\GitHub\Red-Light-Runners\data\model_data\labels\validation.cache... 6015 images, 809 backgrounds, 0 corrupt: 100%|██████████| 6015/6015 [00:00<?, ?it/s]


Plotting labels to ..\runs\augmented_2025-03-15_14-21-012\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 768 train, 768 val
Using 8 dataloader workers
Logging results to [1m..\runs\augmented_2025-03-15_14-21-012[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30      2.84G      3.033      23.41      1.684         54        768: 100%|██████████| 1504/1504 [05:30<00:00,  4.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:46<00:00,  4.03it/s]


                   all       6015      19844      0.859      0.414      0.483      0.229

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30      2.94G        1.8      1.337     0.9498         57        768: 100%|██████████| 1504/1504 [05:31<00:00,  4.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:43<00:00,  4.37it/s]


                   all       6015      19844      0.953      0.561      0.652      0.397

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/30      2.95G      1.574     0.9823     0.9037         20        768: 100%|██████████| 1504/1504 [04:54<00:00,  5.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:39<00:00,  4.75it/s]


                   all       6015      19844       0.84      0.753      0.822       0.49

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/30      2.92G      1.396      0.805     0.8783         68        768: 100%|██████████| 1504/1504 [04:50<00:00,  5.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:36<00:00,  5.10it/s]

                   all       6015      19844      0.918      0.861      0.922      0.566






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/30      2.96G      1.255     0.6855     0.8601         55        768: 100%|██████████| 1504/1504 [06:08<00:00,  4.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:47<00:00,  3.95it/s]

                   all       6015      19844      0.946      0.906      0.964      0.627






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/30      2.94G      1.187     0.6297     0.8511         58        768: 100%|██████████| 1504/1504 [05:53<00:00,  4.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:45<00:00,  4.10it/s]

                   all       6015      19844      0.958      0.931      0.982      0.649






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/30      2.92G      1.134     0.5914     0.8438         51        768: 100%|██████████| 1504/1504 [05:18<00:00,  4.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:37<00:00,  5.00it/s]

                   all       6015      19844      0.942      0.947      0.985      0.654






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/30      2.95G      1.103      0.569     0.8398         35        768: 100%|██████████| 1504/1504 [04:51<00:00,  5.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:37<00:00,  5.07it/s]

                   all       6015      19844      0.953      0.967      0.989      0.669






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/30      2.92G      1.068     0.5443      0.836         44        768: 100%|██████████| 1504/1504 [04:56<00:00,  5.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:38<00:00,  4.92it/s]

                   all       6015      19844      0.949      0.966      0.989      0.676






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/30      2.95G       1.05      0.527     0.8327         53        768: 100%|██████████| 1504/1504 [04:51<00:00,  5.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:36<00:00,  5.21it/s]

                   all       6015      19844      0.976      0.944      0.987      0.688






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/30      2.95G      1.029     0.5151     0.8307         68        768: 100%|██████████| 1504/1504 [05:21<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:42<00:00,  4.38it/s]

                   all       6015      19844      0.976      0.975      0.991      0.699






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/30      2.96G      1.007     0.5003     0.8288         53        768: 100%|██████████| 1504/1504 [05:32<00:00,  4.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:42<00:00,  4.40it/s]

                   all       6015      19844      0.955      0.971       0.99      0.704






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/30      2.95G     0.9948     0.4885     0.8278         26        768: 100%|██████████| 1504/1504 [05:08<00:00,  4.88it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:40<00:00,  4.63it/s]

                   all       6015      19844      0.968      0.974      0.992      0.708






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/30      2.96G     0.9758     0.4796      0.825         83        768: 100%|██████████| 1504/1504 [04:53<00:00,  5.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:39<00:00,  4.76it/s]


                   all       6015      19844      0.974      0.974      0.992      0.718

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/30      2.92G     0.9609     0.4686     0.8229         60        768: 100%|██████████| 1504/1504 [04:52<00:00,  5.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:39<00:00,  4.73it/s]

                   all       6015      19844      0.969      0.979      0.992      0.718






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/30      2.92G     0.9507     0.4598     0.8226         55        768: 100%|██████████| 1504/1504 [04:53<00:00,  5.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:40<00:00,  4.64it/s]

                   all       6015      19844      0.975      0.976      0.992      0.729






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/30      2.94G     0.9377     0.4545     0.8217         57        768: 100%|██████████| 1504/1504 [04:47<00:00,  5.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:36<00:00,  5.11it/s]

                   all       6015      19844      0.979      0.981      0.993      0.732






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/30      2.92G      0.925     0.4455     0.8194         83        768: 100%|██████████| 1504/1504 [04:46<00:00,  5.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:37<00:00,  4.99it/s]

                   all       6015      19844      0.978      0.983      0.992      0.729






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/30      2.92G     0.9182     0.4403     0.8193         67        768: 100%|██████████| 1504/1504 [04:46<00:00,  5.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:36<00:00,  5.14it/s]

                   all       6015      19844      0.972      0.984      0.992      0.738






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/30      2.95G       0.91     0.4374     0.8186         42        768: 100%|██████████| 1504/1504 [04:47<00:00,  5.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:38<00:00,  4.93it/s]

                   all       6015      19844      0.976      0.984      0.993      0.735





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/30      2.89G     0.8547     0.4048     0.8173         19        768: 100%|██████████| 1504/1504 [05:05<00:00,  4.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:40<00:00,  4.70it/s]

                   all       6015      19844      0.973      0.981      0.993      0.735






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/30      2.89G     0.8392     0.3971     0.8159         31        768: 100%|██████████| 1504/1504 [04:51<00:00,  5.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:35<00:00,  5.23it/s]

                   all       6015      19844      0.975      0.983      0.993      0.739






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/30      2.89G     0.8253     0.3865     0.8155         41        768: 100%|██████████| 1504/1504 [04:43<00:00,  5.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:37<00:00,  5.01it/s]


                   all       6015      19844      0.973      0.987      0.992      0.743

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/30      2.89G     0.8178      0.383     0.8144         37        768: 100%|██████████| 1504/1504 [04:45<00:00,  5.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:37<00:00,  5.06it/s]


                   all       6015      19844      0.982      0.984      0.993       0.75

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/30      2.91G     0.8046     0.3746     0.8135         27        768: 100%|██████████| 1504/1504 [04:44<00:00,  5.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:36<00:00,  5.14it/s]

                   all       6015      19844      0.977      0.988      0.993      0.751






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/30      2.91G     0.7927     0.3683     0.8123         35        768: 100%|██████████| 1504/1504 [04:44<00:00,  5.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:39<00:00,  4.75it/s]

                   all       6015      19844      0.984      0.985      0.993      0.756






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/30      2.91G     0.7814     0.3619     0.8105         39        768: 100%|██████████| 1504/1504 [04:30<00:00,  5.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:29<00:00,  6.47it/s]

                   all       6015      19844       0.98      0.987      0.993      0.757






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/30      2.89G     0.7751     0.3578     0.8114         38        768: 100%|██████████| 1504/1504 [04:25<00:00,  5.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:35<00:00,  5.37it/s]

                   all       6015      19844       0.98      0.987      0.993      0.756






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/30      2.89G     0.7596     0.3509     0.8095         36        768: 100%|██████████| 1504/1504 [04:44<00:00,  5.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:37<00:00,  5.08it/s]

                   all       6015      19844      0.984      0.984      0.993      0.758






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/30      2.89G     0.7504     0.3446     0.8083         37        768: 100%|██████████| 1504/1504 [04:44<00:00,  5.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:37<00:00,  5.02it/s]

                   all       6015      19844      0.983      0.985      0.993      0.761






30 epochs completed in 2.840 hours.
Optimizer stripped from ..\runs\augmented_2025-03-15_14-21-012\weights\last.pt, 6.2MB
Optimizer stripped from ..\runs\augmented_2025-03-15_14-21-012\weights\best.pt, 6.2MB

Validating ..\runs\augmented_2025-03-15_14-21-012\weights\best.pt...
Ultralytics 8.3.80  Python-3.10.11 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4070 Laptop GPU, 8188MiB)
YOLOv8n summary (fused): 72 layers, 3,006,428 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 188/188 [00:40<00:00,  4.59it/s]


                   all       6015      19844      0.983      0.985      0.993       0.76
           green_light       2538       6070      0.982      0.984      0.993       0.77
             red_light       4632      13211      0.987      0.986      0.994      0.859
          yellow_light        317        563      0.981      0.985      0.992      0.652
Speed: 0.2ms preprocess, 1.4ms inference, 0.0ms loss, 1.1ms postprocess per image
Results saved to [1m..\runs\augmented_2025-03-15_14-21-012[0m
