In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files

files.upload() # you should have generate an API taken from kaggle/settings

In [None]:
!mkdir ~/.kaggle

!cp kaggle.json ~/.kaggle/

In [None]:
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c 'ada-image-recognition-fiber'

In [None]:
!mkdir ada-image-recognition-fiber

In [None]:
!unzip ada-image-recognition-fiber.zip -d ada-image-recognition-fiber

In [None]:
!git clone https://github.com/albumentations-team/albumentations.git

In [None]:
target_file = "albumentations/albumentations/core/bbox_utils.py"
text_to_find = "return convert_bboxes_to_albumentations(data, self.params.format, rows, cols, check_validity"
lines = None
with open(target_file) as f:
    lines = f.readlines()
    for i, l in enumerate(lines):
        if text_to_find in l:
            lines[i] = l.replace("True", "False")
with open(target_file, 'w') as file:
    file.writelines(lines)

In [None]:
!pip uninstall albumentations --yes

Found existing installation: albumentations 1.2.1
Uninstalling albumentations-1.2.1:
  Successfully uninstalled albumentations-1.2.1


In [None]:
!pip install --user albumentations/

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Processing ./albumentations
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: albumentations
  Building wheel for albumentations (setup.py) ... [?25l[?25hdone
  Created wheel for albumentations: filename=albumentations-1.3.0-py3-none-any.whl size=125707 sha256=94b1dc51c57e07d265b00bf1919799d1be6ccee4cb0bcd2bc85d4a9961c12ad6
  Stored in directory: /tmp/pip-ephem-wheel-cache-1fk7gih4/wheels/f8/d9/cc/e2118e18bd1ec7721538676ef74360bcdfe48097bdb504595a
Successfully built albumentations
Installing collected packages: albumentations
  Attempting uninstall: albumentations
    Found existing installation: albumentations 1.3.0
    Uninstalling albumentations-1.3.0:
      Successfully uninstalled albumentations-1.3.0
Successfully installed albumentations-1.3.0


In [None]:
import albumentations
print(albumentations.__version__)

1.3.0


**Using Albumentations library**

In [None]:
"""
Create a dictionary (named data) with image names as key and for each image,
associate an internal dictionnary with keys "labels" and "bboxes"
- labels is a list containing all labels detected in the image
- bboxes is a flatten list of all anchor boxes positions of objects in the image
"""

import os
import shutil

TRAIN_LABELS_DIR = "ada-image-recognition-fiber/dataset/labels/train/"

label_and_bboxes = os.listdir(TRAIN_LABELS_DIR)
data = dict()
for file in label_and_bboxes:
    path = os.path.join(TRAIN_LABELS_DIR, file)
    name = file.split(".")[0]
    data[name] = {}
    with open(path, 'r') as f:
        bboxes = list()
        labels = list()
        for line in f:
            line = line.split()
            labels.append(int(line[0]))
            bboxes.append(list(map(float, line[1:])))
        data[name]["bboxes"] = bboxes
        data[name]["labels"] = labels

In [None]:
"""
launch the pipeline
source_directory : "ada-image-recognition-fiber/dataset/images/train/"
bboxes : contains for each image in the source directory the bboes and the labels
where: would be the new directory in the parent of the source and should be present in the labels dir as well (just the name for the augmented data folder)
"""

LABELS_DIR = "ada-image-recognition-fiber/dataset/labels/"
IMAGE_DIR = "ada-image-recognition-fiber/dataset/images/"

import cv2
import numpy as np
from PIL import Image

def augmentation_pipeline(transformation_fn, n_iter, source_directory, bboxes_data, augmented_data_dir, augmented_labels_dir):
    files = os.listdir(source_directory)
    for i, (name, data) in enumerate(bboxes_data.items()):
        img_path = os.path.join(source_directory, name+".jpg")
        #image = cv2.imread(img_path)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #transformed = transformation_fn(image=image, bboxes=data["bboxes"], class_labels=data["labels"])
        image = Image.open(img_path)
        transformed = transformation_fn(image=np.array(image), bboxes=data["bboxes"], class_labels=data["labels"])
        transformed_image = transformed['image']
        transformed_bboxes = transformed['bboxes']
        transformed_class_labels = transformed['class_labels']
        if len(transformed_class_labels) == 0:
            continue
        cv2.imwrite(os.path.join(augmented_data_dir, "aug_" + str(n_iter) + "_" + name + '.jpg'), transformed_image)
    
        str_label = ""
        for lab, bb in zip(transformed_class_labels, transformed_bboxes):
            str_label += f"{lab} " + " ".join(map(str, bb)) + "\n"
        
        with open(os.path.join(augmented_labels_dir, "aug_" + str(n_iter) + "_" + name + '.txt'), 'w') as f:
             f.write(str_label)
        print("iteration", n_iter, "image", i+1, "over", len(bboxes_data))
    print("Done for iteration", n_iter)

In [None]:
"""
Prepare the pipeline of different transformations to perform for the augmentation
"""

import albumentations as A
import random
import itertools

transformations = [
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Rotate(limit=(-10, 10), p=0.5),
    A.Perspective(scale=(0.05, 0.1), p=0.5),
    A.GridDistortion(p=0.5),
    A.ElasticTransform(p=0.5),
    A.RandomSnow(p=0.5),
    A.RandomRain(p=0.5),
    A.RandomFog(p=0.5),
    A.RandomShadow(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=20, p=0.5),
    # A.mixup(p=0.5),
    #A.Cutout(num_holes=1, max_h_size=64, max_w_size=64, p=0.5),
    #A.CoarseDropout(max_holes=4, max_height=32, max_width=32, min_holes=1, min_height=8, min_width=8, fill_value=0, p=0.2),
    # A.RandomErasing(p=0.5),
]

fix_transformations = [
    A.Resize(width=640, height=640),
    A.OneOf([
        A.Blur(blur_limit=3, p=0.5),
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.2),
    A.OneOf([
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
        A.RandomGamma(gamma_limit=(80, 120), p=0.5),
        A.CLAHE(clip_limit=(1, 4), p=0.5),
    ], p=0.2)
]

n_transforms_per_image = 4
n_augmentations_per_image = 8

combinations = list(itertools.combinations(transformations, n_transforms_per_image))
results = random.sample(combinations, n_augmentations_per_image)

results = list(map(list, results))
results = list(map(lambda x: fix_transformations + x, results))



In [None]:
import concurrent.futures

source_dir = "ada-image-recognition-fiber/dataset/images/train/"
where = "aug_train"
augmented_labels_dir = LABELS_DIR + where
augmented_data_dir = IMAGE_DIR + where
if os.path.exists(augmented_labels_dir):
    shutil.rmtree(augmented_labels_dir)
os.makedirs(augmented_labels_dir)
if os.path.exists(augmented_data_dir):
    shutil.rmtree(augmented_data_dir)
os.makedirs(augmented_data_dir)


def parallel_augmentation(n, results, source_dir, data):
    transform = A.Compose(results[n], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'], check_each_transform=False))
    augmentation_pipeline(transform, n+1, source_dir, data, augmented_data_dir, augmented_labels_dir)
    return None


with concurrent.futures.ProcessPoolExecutor() as executor:
    futures = []
    for n in range(n_augmentations_per_image):
        future = executor.submit(parallel_augmentation, n, results, source_dir, data)
        futures.append(future)
    for future in concurrent.futures.as_completed(futures):
        result = future.result()


# for n in range(n_augmentations_per_image):
#     transform = A.Compose(results[n], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'], check_each_transform=False))
#     augmentation_pipeline(transform, n+1, source_dir, data, augmented_data_dir, augmented_labels_dir)

In [None]:
import zipfile
import os

def zip_folder(folder_path, zip_path):
    """
    Compresses a folder into a ZIP archive.

    :param folder_path: Path to the folder to be compressed.
    :param zip_path: Path to the output ZIP archive.
    """

    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, folder_path))

zip_folder('yolov5/', 'res_80_epochs_sgd.zip')
# zip_folder('ada-image-recognition-fiber/dataset/images/aug_train/', 'aug_img.zip')
# zip_folder('ada-image-recognition-fiber/dataset/labels/aug_train/', 'aug_lbl.zip')