In [19]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://download.pytorch.org/whl/cu121
Note: you may need to restart the kernel to use updated packages.


In [20]:
import cv2
import numpy as np
import albumentations as A
from albumentations import Compose, HorizontalFlip, RandomRotate90, GaussianBlur, Normalize
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os.path
from PIL import Image, ImageDraw
from sahi.utils.file import load_json, save_json
from tqdm import tqdm
import timm
import random

import torchvision
import torch


In [21]:
torch.cuda.is_available()
torch.cuda.current_device()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [22]:
coco_file_name  = "cassette1_train"

# Determine target split from coco_file_name
target_split = None
if "train" in coco_file_name.lower():
    target_split = "train"
elif "test" in coco_file_name.lower():
    target_split = "test"
elif "val" in coco_file_name.lower():
    target_split = "val"
if not target_split:
    raise ValueError("Unable to determine target split from coco_file_name.")

print(target_split)

DATA_DIR = os.path.join("..", "data")
COCO_DIR = os.path.join(DATA_DIR, "coco")

ORG_ANNOTATION_PATH = os.path.join(DATA_DIR, "coco",target_split, f"{coco_file_name}_corrected_coco.json")
SLC_ANNOTATION_PATH = os.path.join(DATA_DIR, "coco",target_split, f"{coco_file_name}_sliced_coco.json")

AUGMENTATION_PATH = os.path.join(COCO_DIR, "augmentated") # Folder for all augmentations ./data/coco/augmentated

print(ORG_ANNOTATION_PATH)

#IMAGE_DIR = os.path.join(COCO_DIR, "images")
IMAGE_DIR = os.path.join(COCO_DIR, "images")
NEW_IMAGE_DIR = os.path.join(COCO_DIR, "images", "png") #for saving new original images (untouched)
SLICED_IMAGE_DIR = os.path.join(COCO_DIR, "images_sliced",coco_file_name)

BBOX_VISUALIZATION_DIR = os.path.join(DATA_DIR, "bbox_vis", coco_file_name)
BBOX_SAVE_DIR = os.path.join(BBOX_VISUALIZATION_DIR,"each") #Place to savea each of the before augmented bounding boxes

os.path.exists(DATA_DIR)
os.path.exists(COCO_DIR)

os.path.exists(ORG_ANNOTATION_PATH)
os.path.exists(IMAGE_DIR)
os.path.exists(BBOX_VISUALIZATION_DIR)

os.makedirs(BBOX_SAVE_DIR, exist_ok=True)
os.makedirs(NEW_IMAGE_DIR, exist_ok=True)
os.makedirs(AUGMENTATION_PATH,exist_ok=True)

train
..\data\coco\train\cassette1_train_corrected_coco.json


In [23]:
coco_dict = load_json(ORG_ANNOTATION_PATH)
[img.update({"file_name": img["file_name"].split("/")[-1]}) for img in coco_dict["images"]]
save_json(coco_dict, save_path=ORG_ANNOTATION_PATH)

coco_dict

{'images': [{'width': 4096,
   'height': 2000,
   'id': 1,
   'file_name': '01BE02.bmp'},
  {'width': 4096, 'height': 2000, 'id': 2, 'file_name': '01BE03.bmp'},
  {'width': 4096, 'height': 2000, 'id': 5, 'file_name': '01BN03.bmp'},
  {'width': 4096, 'height': 2000, 'id': 6, 'file_name': '01BS00.bmp'},
  {'width': 4096, 'height': 2000, 'id': 9, 'file_name': '01BS03.bmp'},
  {'width': 4096, 'height': 2000, 'id': 10, 'file_name': '01BW00.bmp'},
  {'width': 4096, 'height': 2000, 'id': 11, 'file_name': '01BW01.bmp'},
  {'width': 4096, 'height': 2000, 'id': 12, 'file_name': '01BW02.bmp'},
  {'width': 4096, 'height': 2000, 'id': 13, 'file_name': '01FE00.bmp'},
  {'width': 4096, 'height': 2000, 'id': 14, 'file_name': '01FE01.bmp'},
  {'width': 4096, 'height': 2000, 'id': 15, 'file_name': '01FE02.bmp'},
  {'width': 4096, 'height': 2000, 'id': 16, 'file_name': '01FN00.bmp'},
  {'width': 4096, 'height': 2000, 'id': 17, 'file_name': '01FN01.bmp'},
  {'width': 4096, 'height': 2000, 'id': 18, 'file_

In [24]:
# Process images with a progress bar
for img in tqdm(coco_dict["images"], desc="Processing original images"):
    # Open and convert the image to grayscale
    mono_img = Image.open(os.path.join(IMAGE_DIR, img["file_name"])).convert("L")
    
    # Save the grayscale image as .png
    png_file_name = img["file_name"].replace(".bmp", ".png")
    mono_img.save(os.path.join(NEW_IMAGE_DIR, png_file_name), format="PNG")
    
    # Convert grayscale image to RGB for bounding box visualization
    rgb_img = Image.merge("RGB", (mono_img, mono_img, mono_img))

    # Visualize bounding boxes on the RGB image
    for ann in coco_dict["annotations"]:
        if ann["image_id"] == img["id"]:
            xywh = ann["bbox"]
            xyxy = [xywh[0], xywh[1], xywh[0] + xywh[2], xywh[1] + xywh[3]]
            ImageDraw.Draw(rgb_img).rectangle(xyxy, width=5, outline="lime")

    # Display and save the image with bounding boxes as .png
    fig, ax = plt.subplots(1, 1, figsize=(12, 9), constrained_layout=True)
    ax.axis("off")
    ax.imshow(rgb_img)
    fig.savefig(os.path.join(BBOX_SAVE_DIR, png_file_name))
    plt.close(fig)

Processing original images: 100%|██████████| 18/18 [00:29<00:00,  1.61s/it]


In [25]:
slc_dict= load_json(SLC_ANNOTATION_PATH)
print(SLC_ANNOTATION_PATH)

slc_dict

..\data\coco\train\cassette1_train_sliced_coco.json


{'images': [{'height': 640,
   'width': 640,
   'id': 1,
   'file_name': '01BE02_0_0_0_640_640.png'},
  {'height': 640,
   'width': 640,
   'id': 2,
   'file_name': '01BE02_0_512_0_1152_640.png'},
  {'height': 640,
   'width': 640,
   'id': 3,
   'file_name': '01BE02_0_1024_0_1664_640.png'},
  {'height': 640,
   'width': 640,
   'id': 4,
   'file_name': '01BE02_0_1536_0_2176_640.png'},
  {'height': 640,
   'width': 640,
   'id': 5,
   'file_name': '01BE02_0_2048_0_2688_640.png'},
  {'height': 640,
   'width': 640,
   'id': 6,
   'file_name': '01BE02_0_2560_0_3200_640.png'},
  {'height': 640,
   'width': 640,
   'id': 7,
   'file_name': '01BE02_0_3072_0_3712_640.png'},
  {'height': 640,
   'width': 640,
   'id': 8,
   'file_name': '01BE02_0_3456_0_4096_640.png'},
  {'height': 640,
   'width': 640,
   'id': 9,
   'file_name': '01BE02_0_0_512_640_1152.png'},
  {'height': 640,
   'width': 640,
   'id': 10,
   'file_name': '01BE02_0_512_512_1152_1152.png'},
  {'height': 640,
   'width': 640

In [26]:
for img in tqdm(slc_dict["images"], desc="Processing sliced images"):
    fig, ax = plt.subplots(1, 1, figsize=(12, 9), constrained_layout=True)
    
    # Open the sliced image file in grayscale, convert it to RGB
    sliced_img_path = os.path.join(SLICED_IMAGE_DIR, img["file_name"])
    mono_img = Image.open(sliced_img_path).convert("L")
    rgb_img = Image.merge("RGB", (mono_img, mono_img, mono_img))
    
    # Iterate over all annotations for this specific image
    for annotation in slc_dict["annotations"]:
        if annotation["image_id"] == img["id"]:
            # Extract and convert bounding box coordinates
            xywh = annotation["bbox"]
            xyxy = [xywh[0], xywh[1], xywh[0] + xywh[2], xywh[1] + xywh[3]]
            
            # Draw the bounding box on the image
            ImageDraw.Draw(rgb_img).rectangle(xyxy, width=5, outline="lime")
    
    # Display and save the image with bounding boxes
    ax.axis("off")
    ax.imshow(rgb_img)
    fig.savefig(os.path.join(BBOX_SAVE_DIR, img["file_name"][:-4] + ".png"))  # Save each image to the specified folder
    plt.close(fig)

Processing sliced images: 100%|██████████| 576/576 [01:55<00:00,  4.99it/s]


In [27]:
splits = ["train", "test", "val"]

# Create 'original' and 'sliced' directories within each split
for split in splits:
    base_path = os.path.join(AUGMENTATION_PATH, split)
    os.makedirs(base_path, exist_ok=True)
    
    # Create 'original' and 'sliced' subdirectories within each split
    os.makedirs(os.path.join(base_path, "original"), exist_ok=True)
    os.makedirs(os.path.join(base_path, "sliced"), exist_ok=True)

## spatial AUGMENTATION

### Horizontal Flip

In [18]:
def horizontal_flip(
    image_dir,
    coco_annotations,
    output_dir,
    aug_type="horizontal_flip",
    dataset_type="original",
    mean=(0.485, 0.456, 0.406),
    std=(0.229, 0.224, 0.225),
    max_pixel_value=255.0
):
    # Set directories based on dataset_type
    AUG_SAVE_DIR = os.path.join(AUGMENTATION_PATH, target_split, dataset_type, aug_type, "each")
    AUG_VIEW_DIR = os.path.join(AUGMENTATION_PATH, target_split, dataset_type, aug_type)

    os.makedirs(AUG_SAVE_DIR, exist_ok=True)
    os.makedirs(AUG_VIEW_DIR, exist_ok=True)
    
    # Augmentation pipeline with normalization
    augmentation_pipeline = A.Compose([
        A.HorizontalFlip(p=1.0), # P is probability
        A.Normalize(mean=mean, std=std, max_pixel_value=max_pixel_value)
    ], bbox_params=A.BboxParams(format="coco", label_fields=["class_labels"]))

    for img in tqdm(coco_annotations["images"], desc=f"Processing {dataset_type} images"):
        file_name = img["file_name"].replace(".bmp", ".png")  # Replace .bmp with .png
        image_id = img["id"]

        # Load and convert the image to grayscale
        image_path = os.path.join(image_dir, file_name)
        mono_img = Image.open(image_path).convert("L")
        image_np = np.array(mono_img)

        # Collect bounding boxes and class labels for the image
        bboxes = [ann["bbox"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]
        class_labels = [ann["category_id"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]

        # Apply augmentation, including class_labels
        augmented = augmentation_pipeline(image=image_np, bboxes=bboxes, class_labels=class_labels)
        
        # Rescale augmented image back to 0–255 for viewing and saving
        augmented_image = ((augmented["image"] * np.array(std[0]) + mean[0]) * max_pixel_value).clip(0, 255).astype("uint8")
        Image.fromarray(augmented_image).save(os.path.join(AUG_SAVE_DIR, f"{aug_type}_{file_name}"))

        # Convert the augmented image to RGB for bounding box visualization
        rgb_img = Image.merge("RGB", (Image.fromarray(augmented_image),) * 3)
        draw = ImageDraw.Draw(rgb_img)
        
        # Draw bounding boxes on the RGB image
        for bbox in augmented["bboxes"]:
            xyxy = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
            draw.rectangle(xyxy, outline="lime", width=5)
        
        # Save the image with bounding boxes drawn
        rgb_img.save(os.path.join(AUG_VIEW_DIR, f"{aug_type}_{file_name}"))

    # Save the updated COCO JSON annotations
    augmented_json_path = os.path.join(output_dir, f"{coco_file_name}_{aug_type}.json")
    save_json(coco_annotations, augmented_json_path)
    print(f"Augmented annotations saved to {augmented_json_path}")

# Run the function for both original and sliced cases
horizontal_flip(NEW_IMAGE_DIR, coco_dict, os.path.join(AUGMENTATION_PATH, target_split, "original", "horizontal_flip"), dataset_type="original")
horizontal_flip(SLICED_IMAGE_DIR, slc_dict, os.path.join(AUGMENTATION_PATH, target_split, "sliced", "horizontal_flip"), dataset_type="sliced")

Processing original images: 100%|██████████| 18/18 [00:41<00:00,  2.29s/it]


Augmented annotations saved to ..\data\coco\augmentated\train\original\horizontal_flip\cassette1_train_horizontal_flip.json


Processing sliced images: 100%|██████████| 576/576 [01:12<00:00,  7.93it/s]

Augmented annotations saved to ..\data\coco\augmentated\train\sliced\horizontal_flip\cassette1_train_horizontal_flip.json





### Vertical Flip

In [28]:
def vertical_flip(
    image_dir,
    coco_annotations,
    output_dir,
    aug_type="vertical_flip",
    dataset_type="original",
    mean=(0.485, 0.456, 0.406),
    std=(0.229, 0.224, 0.225),
    max_pixel_value=255.0
):
    # Set directories based on dataset_type
    AUG_SAVE_DIR = os.path.join(AUGMENTATION_PATH, target_split, dataset_type, aug_type, "each")
    AUG_VIEW_DIR = os.path.join(AUGMENTATION_PATH, target_split, dataset_type, aug_type)

    os.makedirs(AUG_SAVE_DIR, exist_ok=True)
    os.makedirs(AUG_VIEW_DIR, exist_ok=True)
    
    # Augmentation pipeline with vertical flip and normalization
    augmentation_pipeline = A.Compose([
        A.VerticalFlip(p=1.0),
        A.Normalize(mean=mean, std=std, max_pixel_value=max_pixel_value)
    ], bbox_params=A.BboxParams(format="coco", label_fields=["class_labels"]))

    for img in tqdm(coco_annotations["images"], desc=f"Processing {dataset_type} images"):
        file_name = img["file_name"].replace(".bmp", ".png")  # Replace .bmp with .png
        image_id = img["id"]

        # Load and convert the image to grayscale
        image_path = os.path.join(image_dir, file_name)
        mono_img = Image.open(image_path).convert("L")
        image_np = np.array(mono_img)

        # Collect bounding boxes and class labels for the image
        bboxes = [ann["bbox"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]
        class_labels = [ann["category_id"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]

        # Apply augmentation, including class_labels
        augmented = augmentation_pipeline(image=image_np, bboxes=bboxes, class_labels=class_labels)
        
        # Rescale augmented image back to 0–255 for viewing and saving
        augmented_image = ((augmented["image"] * np.array(std[0]) + mean[0]) * max_pixel_value).clip(0, 255).astype("uint8")
        Image.fromarray(augmented_image).save(os.path.join(AUG_SAVE_DIR, f"{aug_type}_{file_name}"))

        # Convert the augmented image to RGB for bounding box visualization
        rgb_img = Image.merge("RGB", (Image.fromarray(augmented_image),) * 3)
        draw = ImageDraw.Draw(rgb_img)
        
        # Draw bounding boxes on the RGB image
        for bbox in augmented["bboxes"]:
            xyxy = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
            draw.rectangle(xyxy, outline="lime", width=5)
        
        # Save the image with bounding boxes drawn
        rgb_img.save(os.path.join(AUG_VIEW_DIR, f"{aug_type}_{file_name}"))

    # Save the updated COCO JSON annotations
    augmented_json_path = os.path.join(output_dir, f"{coco_file_name}_{aug_type}.json")
    save_json(coco_annotations, augmented_json_path)
    print(f"Augmented annotations saved to {augmented_json_path}")

# Run the function for both original and sliced cases
vertical_flip(NEW_IMAGE_DIR, coco_dict, os.path.join(AUGMENTATION_PATH, target_split, "original", "vertical_flip"), dataset_type="original")
vertical_flip(SLICED_IMAGE_DIR, slc_dict, os.path.join(AUGMENTATION_PATH, target_split, "sliced", "vertical_flip"), dataset_type="sliced")

Processing original images: 100%|██████████| 18/18 [00:40<00:00,  2.22s/it]


Augmented annotations saved to ..\data\coco\augmentated\train\original\vertical_flip\cassette1_train_vertical_flip.json


Processing sliced images: 100%|██████████| 576/576 [00:57<00:00, 10.07it/s]

Augmented annotations saved to ..\data\coco\augmentated\train\sliced\vertical_flip\cassette1_train_vertical_flip.json





In [29]:
import random

def random_flip(
    image_dir,
    coco_annotations,
    output_dir,
    dataset_type="original",
    mean=(0.485, 0.456, 0.406),
    std=(0.229, 0.224, 0.225),
    max_pixel_value=255.0
):
    # Set a fixed aug_type to ensure output is saved in the "random_flip" folder
    aug_type = "random_flip"

    # Randomly choose between horizontal and vertical flip
    if random.choice([True, False]):
        print("Applying horizontal flip")
        horizontal_flip(
            image_dir=image_dir,
            coco_annotations=coco_annotations,
            output_dir=output_dir,
            aug_type=aug_type,  # Use "random_flip" as aug_type
            dataset_type=dataset_type,
            mean=mean,
            std=std,
            max_pixel_value=max_pixel_value
        )
    else:
        print("Applying vertical flip")
        vertical_flip(
            image_dir=image_dir,
            coco_annotations=coco_annotations,
            output_dir=output_dir,
            aug_type=aug_type,  # Use "random_flip" as aug_type
            dataset_type=dataset_type,
            mean=mean,
            std=std,
            max_pixel_value=max_pixel_value
        )

# Run the random_flip function for both original and sliced cases
random_flip(NEW_IMAGE_DIR, coco_dict, os.path.join(AUGMENTATION_PATH, target_split, "original", "random_flip"), dataset_type="original")
random_flip(SLICED_IMAGE_DIR, slc_dict, os.path.join(AUGMENTATION_PATH, target_split, "sliced", "random_flip"), dataset_type="sliced")


Applying vertical flip


Processing original images: 100%|██████████| 18/18 [00:48<00:00,  2.67s/it]


Augmented annotations saved to ..\data\coco\augmentated\train\original\random_flip\cassette1_train_random_flip.json
Applying horizontal flip


Processing sliced images: 100%|██████████| 576/576 [01:00<00:00,  9.50it/s]

Augmented annotations saved to ..\data\coco\augmentated\train\sliced\random_flip\cassette1_train_random_flip.json





### Safe Rotate

In [30]:
def safe_rotate( #Needs Work
    image_dir,
    coco_annotations,
    output_dir,
    limit=(-90, 90),  # Range for random rotation angle
    interpolation=1,  # Default is cv2.INTER_LINEAR
    border_mode=4,  # Default is cv2.BORDER_REFLECT_101
    rotate_method="largest_box",  # How to handle bounding boxes
    aug_type="safe_rotate",
    dataset_type="original",
    mean=(0.485, 0.456, 0.406),
    std=(0.229, 0.224, 0.225),
    max_pixel_value=255.0
):
    # Set directories based on dataset_type
    AUG_SAVE_DIR = os.path.join(AUGMENTATION_PATH, target_split, dataset_type, aug_type, "each")
    AUG_VIEW_DIR = os.path.join(AUGMENTATION_PATH, target_split, dataset_type, aug_type)

    os.makedirs(AUG_SAVE_DIR, exist_ok=True)
    os.makedirs(AUG_VIEW_DIR, exist_ok=True)
    
    # Augmentation pipeline with SafeRotate and normalization
    augmentation_pipeline = A.Compose([
        A.SafeRotate(
            limit=limit,  # Random rotation within specified range
            interpolation=interpolation,
            border_mode=border_mode,
            rotate_method=rotate_method,
            p=1.0
        ),
        A.Normalize(mean=mean, std=std, max_pixel_value=max_pixel_value)
    ], bbox_params=A.BboxParams(format="coco", label_fields=["class_labels"]))

    # Initialize a new list to store augmented annotations
    new_annotations = []

    for img in tqdm(coco_annotations["images"], desc=f"Processing {dataset_type} images"):
        file_name = img["file_name"].replace(".bmp", ".png")  # Replace .bmp with .png
        image_id = img["id"]

        # Load and convert the image to grayscale
        image_path = os.path.join(image_dir, file_name)
        mono_img = Image.open(image_path).convert("L")
        image_np = np.array(mono_img)

        # Collect bounding boxes and class labels for the image
        bboxes = [ann["bbox"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]
        class_labels = [ann["category_id"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]

        # Apply augmentation, including class_labels
        augmented = augmentation_pipeline(image=image_np, bboxes=bboxes, class_labels=class_labels)
        
        # Rescale augmented image back to 0–255 for viewing and saving
        augmented_image = ((augmented["image"] * np.array(std[0]) + mean[0]) * max_pixel_value).clip(0, 255).astype("uint8")
        Image.fromarray(augmented_image).save(os.path.join(AUG_SAVE_DIR, f"{aug_type}_{file_name}"))

        # Convert the augmented image to RGB for bounding box visualization
        rgb_img = Image.merge("RGB", (Image.fromarray(augmented_image),) * 3)
        draw = ImageDraw.Draw(rgb_img)

        # Update new annotations with augmented bounding boxes directly
        for bbox, label in zip(augmented["bboxes"], class_labels):
            # Visualize the bounding box
            xyxy = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
            draw.rectangle(xyxy, outline="lime", width=5)
            
            # Save the updated annotation
            new_annotations.append({
                "image_id": image_id,
                "category_id": label,
                "bbox": bbox,
                "area": bbox[2] * bbox[3],
                "iscrowd": 0,
                "id": len(new_annotations) + 1  # Unique ID for each annotation
            })

        # Save the image with bounding boxes drawn
        rgb_img.save(os.path.join(AUG_VIEW_DIR, f"{aug_type}_{file_name}"))

    # Replace the original annotations with the new, augmented ones
    coco_annotations["annotations"] = new_annotations

    # Save the updated COCO JSON annotations
    augmented_json_path = os.path.join(output_dir, f"{coco_file_name}_{aug_type}.json")
    save_json(coco_annotations, augmented_json_path)
    print(f"Augmented annotations saved to {augmented_json_path}")

# Run the safe_rotate function for both original and sliced cases
safe_rotate(NEW_IMAGE_DIR, coco_dict, os.path.join(AUGMENTATION_PATH, target_split, "original", "safe_rotate"), dataset_type="original")
safe_rotate(SLICED_IMAGE_DIR, slc_dict, os.path.join(AUGMENTATION_PATH, target_split, "sliced", "safe_rotate"), dataset_type="sliced")

# Run the safe_rotate function for both original and sliced cases
safe_rotate(NEW_IMAGE_DIR, coco_dict, os.path.join(AUGMENTATION_PATH, target_split, "original", "safe_rotate"), dataset_type="original")
safe_rotate(SLICED_IMAGE_DIR, slc_dict, os.path.join(AUGMENTATION_PATH, target_split, "sliced", "safe_rotate"), dataset_type="sliced")

Processing original images: 100%|██████████| 18/18 [00:57<00:00,  3.22s/it]


Augmented annotations saved to ..\data\coco\augmentated\train\original\safe_rotate\cassette1_train_safe_rotate.json


Processing sliced images: 100%|██████████| 576/576 [01:14<00:00,  7.73it/s]


Augmented annotations saved to ..\data\coco\augmentated\train\sliced\safe_rotate\cassette1_train_safe_rotate.json


Processing original images: 100%|██████████| 18/18 [00:48<00:00,  2.70s/it]


Augmented annotations saved to ..\data\coco\augmentated\train\original\safe_rotate\cassette1_train_safe_rotate.json


Processing sliced images: 100%|██████████| 576/576 [01:06<00:00,  8.65it/s]

Augmented annotations saved to ..\data\coco\augmentated\train\sliced\safe_rotate\cassette1_train_safe_rotate.json





### Optical Augmentation

In [31]:
def optical_distortion(
    image_dir,
    coco_annotations,
    output_dir,
    dataset_type="original",
    distort_limit=0.5,
    shift_limit=0.05,
    interpolation=cv2.INTER_LINEAR,
    border_mode=cv2.BORDER_REFLECT_101,
    mean=(0.485, 0.456, 0.406),
    std=(0.229, 0.224, 0.225),
    max_pixel_value=255.0
):
    # Set a fixed aug_type to ensure output is saved in the "optical_distortion" folder
    aug_type = "optical_distortion"

    # Create directories for saving augmented images and annotations
    AUG_SAVE_DIR = os.path.join(output_dir, dataset_type, aug_type, "each")
    os.makedirs(AUG_SAVE_DIR, exist_ok=True)

    # Define the augmentation pipeline with OpticalDistortion and normalization
    transform = A.Compose([
        A.OpticalDistortion(
            distort_limit=distort_limit,
            shift_limit=shift_limit,
            interpolation=interpolation,
            border_mode=border_mode,
            p=1.0
        ),
        A.Normalize(mean=mean, std=std, max_pixel_value=max_pixel_value)
    ], bbox_params=A.BboxParams(format="coco", label_fields=["class_labels"]))

    # Process each image in coco_annotations
    new_annotations = []
    for img in tqdm(coco_annotations["images"], desc=f"Processing {dataset_type} images"):
        file_name = img["file_name"].replace(".bmp", ".png")
        image_id = img["id"]

        # Load the image
        image_path = os.path.join(image_dir, file_name)
        image = cv2.imread(image_path)

        # Collect bounding boxes and class labels
        bboxes = [ann["bbox"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]
        class_labels = [ann["category_id"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]

        # Apply the transformation
        transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels)

        # Save the augmented image
        augmented_image = (transformed["image"] * max_pixel_value).clip(0, 255).astype("uint8")
        augmented_image_path = os.path.join(AUG_SAVE_DIR, f"{aug_type}_{file_name}")
        cv2.imwrite(augmented_image_path, augmented_image)

        # Update the new annotations with transformed bounding boxes
        for bbox, label in zip(transformed["bboxes"], class_labels):
            new_annotations.append({
                "image_id": image_id,
                "category_id": label,
                "bbox": bbox,
                "area": bbox[2] * bbox[3],
                "iscrowd": 0,
                "id": len(new_annotations) + 1
            })

    # Update the COCO annotations with augmented bounding boxes
    coco_annotations["annotations"] = new_annotations

    # Save the updated COCO JSON annotations
    augmented_json_path = os.path.join(output_dir, f"{coco_file_name}_{aug_type}.json")
    save_json(coco_annotations, augmented_json_path)
    print(f"Augmented annotations saved to {augmented_json_path}")

# Run the function for both original and sliced cases
horizontal_flip(NEW_IMAGE_DIR, coco_dict, os.path.join(AUGMENTATION_PATH, target_split, "original", "optical"), dataset_type="original")
horizontal_flip(SLICED_IMAGE_DIR, slc_dict, os.path.join(AUGMENTATION_PATH, target_split, "sliced", "optical"), dataset_type="sliced")

Processing original images: 100%|██████████| 18/18 [00:40<00:00,  2.23s/it]


Augmented annotations saved to ..\data\coco\augmentated\train\original\optical\cassette1_train_horizontal_flip.json


Processing sliced images: 100%|██████████| 576/576 [00:55<00:00, 10.44it/s]

Augmented annotations saved to ..\data\coco\augmentated\train\sliced\optical\cassette1_train_horizontal_flip.json





# Pixel Wise Augmentation

In [32]:
def adjust_brightness(
    image_dir,
    coco_annotations,
    output_dir,
    dataset_type="original",
    brightness_limit=0.2,  # Adjust brightness within this limit
    contrast_limit=0.2,    # Adjust contrast within this limit
    mean=(0.485, 0.456, 0.406),
    std=(0.229, 0.224, 0.225),
    max_pixel_value=255.0
):
    # Set augmentation type and create directories
    aug_type = "brightness_adjustment"
    AUG_SAVE_DIR = os.path.join(output_dir, dataset_type, aug_type, "each")
    os.makedirs(AUG_SAVE_DIR, exist_ok=True)

    # Define augmentation pipeline with RandomBrightnessContrast and normalization
    transform = A.Compose([
        A.RandomBrightnessContrast(
            brightness_limit=brightness_limit,
            contrast_limit=contrast_limit,
            p=1.0
        ),
        A.Normalize(mean=mean, std=std, max_pixel_value=max_pixel_value)
    ], bbox_params=A.BboxParams(format="coco", label_fields=["class_labels"]))

    # Process each image in coco_annotations
    new_annotations = []
    for img in tqdm(coco_annotations["images"], desc=f"Processing {dataset_type} images"):
        file_name = img["file_name"].replace(".bmp", ".png")
        image_id = img["id"]

        # Load the image
        image_path = os.path.join(image_dir, file_name)
        image = cv2.imread(image_path)

        if image is None:
            print(f"Image not found: {image_path}")
            continue

        # Collect bounding boxes and class labels
        bboxes = [ann["bbox"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]
        class_labels = [ann["category_id"] for ann in coco_annotations["annotations"] if ann["image_id"] == image_id]

        # Apply the transformation
        transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels)

        # Save the augmented image
        augmented_image = (transformed["image"] * max_pixel_value).clip(0, 255).astype("uint8")
        augmented_image_path = os.path.join(AUG_SAVE_DIR, f"{aug_type}_{file_name}")
        cv2.imwrite(augmented_image_path, augmented_image)

        # Update the new annotations with transformed bounding boxes
        for bbox, label in zip(transformed["bboxes"], class_labels):
            new_annotations.append({
                "image_id": image_id,
                "category_id": label,
                "bbox": bbox,
                "area": bbox[2] * bbox[3],
                "iscrowd": 0,
                "id": len(new_annotations) + 1
            })

    # Update COCO annotations with augmented bounding boxes
    coco_annotations["annotations"] = new_annotations

    # Save the updated COCO JSON annotations
    augmented_json_path = os.path.join(output_dir, f"{coco_file_name}_{aug_type}.json")
    save_json(coco_annotations, augmented_json_path)
    print(f"Augmented annotations saved to {augmented_json_path}")

# Run the function for both original and sliced cases
horizontal_flip(NEW_IMAGE_DIR, coco_dict, os.path.join(AUGMENTATION_PATH, target_split, "original", "brightness"), dataset_type="original")
horizontal_flip(SLICED_IMAGE_DIR, slc_dict, os.path.join(AUGMENTATION_PATH, target_split, "sliced", "brightness"), dataset_type="sliced")

Processing original images: 100%|██████████| 18/18 [00:42<00:00,  2.39s/it]


Augmented annotations saved to ..\data\coco\augmentated\train\original\brightness\cassette1_train_horizontal_flip.json


Processing sliced images: 100%|██████████| 576/576 [01:12<00:00,  7.98it/s]

Augmented annotations saved to ..\data\coco\augmentated\train\sliced\brightness\cassette1_train_horizontal_flip.json



