https://github.com/aleju/imgaug

In [None]:
!conda config --add channels conda-forge
!conda install imgaug


In [None]:
!pip install imgaug

In [None]:
import os
import glob
import pandas as pd

import imageio
import re
import cv2
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
%matplotlib inline
ia.seed(1)

In [None]:
# Augmentation ......

input_path = 'data/input/hardhat/train/'
output_path = 'data/output/hardhat/train/'

# Number of augmented images from a single image.
number_of_aug_images = 5

# Read input csv annotation file
df = pd.read_csv(input_path + '_annotations.csv')

# Group the boundary box information by filename
filename_group = df.groupby("filename")

# create data frame which we're going to populate with augmented image info
aug_bbs_xy = pd.DataFrame(columns=['filename','width','height','class', 'xmin', 'ymin', 'xmax', 
                                   'ymax'])
for name, groups in filename_group:
    
    file_name = input_path + name
    image = imageio.imread(file_name)
    
    boxes = []
    
    for key, obj in groups.iterrows() :
        # find the bounding box coordinates
        xmin = int (obj['xmin'])
        ymin = int (obj['ymin'])
        xmax = int (obj['xmax'])
        ymax = int (obj['ymax'])
        label = obj['class'] 
        
        boxes.append(ia.BoundingBox(x1=xmin, y1=ymin, x2=xmax, y2=ymax, label=label))
        
    
    bbs = BoundingBoxesOnImage(boxes, shape=image.shape)
    
    
    
    for x in range(number_of_aug_images):
        
        seq = iaa.Sequential([
            iaa.Affine(
                scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                rotate=(-15, 15),
                shear=(-8, 8)
            )
        ], random_order=True)
        
        image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs)
        
        # disregard bounding boxes which have fallen out of image pane    
        bbs_aug = bbs_aug.remove_out_of_image()
        # clip bounding boxes which are partially outside of image pane
        bbs_aug = bbs_aug.clip_out_of_image()
        
        
        #don't perform any actions with the image if there are no bounding boxes left in it
        if re.findall('Image...', str(bbs_aug)) == ['Image([]']:
            pass
        
        else:
            # write augmented image to a file
            imageio.imwrite(output_path + "aug_" + str(x) + "_" + name, image_aug)  
            
            for bbs_box in bbs_aug:
                
                rows = []
                rows.append(["aug_" + str(x) + "_" + name, image_aug.shape[1], image_aug.shape[0],
                                 bbs_box.label, bbs_box.x1, bbs_box.y1, bbs_box.x2, bbs_box.y2])
                
                aug_bbs_xy_new = pd.DataFrame(rows, columns=['filename','width','height','class', 'xmin', 
                                                             'ymin', 'xmax', 'ymax'])
                # append rows to aug_bbs_xy data frame
                aug_bbs_xy = pd.concat([aug_bbs_xy, aug_bbs_xy_new])
        
    

# Export final CSV along with augmented images boundary box information.        
aug_bbs_xy.to_csv(output_path + '_annotations.csv', index=False)

print("Augmentation is done.....!!!!!!")
    
        




    

In [None]:
# Convert CSV to yolo format annotations
def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

In [None]:
# Convert CSV to yolo format...

input_path = 'data/input/openimage-helmet/helmet/'
output_path = 'data/input/openimage-helmet/helmet/'

classes = ['head', 'helmet']

df = pd.read_csv(input_path + 'labels_helmet_train.csv')

filename_group = df.groupby("filename")

                            
for name, groups in filename_group:
    
    file_name = input_path + name
    
    basename = os.path.basename(file_name)
    basename_no_ext = os.path.splitext(basename)[0]
    
    image = imageio.imread(file_name)
    
    w = int(image.shape[1])
    h = int(image.shape[0])
    
    # Open equivalent txt file for each image
    out_file = open(output_path + basename_no_ext + '.txt', 'w')
    
    
    for key, obj in groups.iterrows() :
        
        cls = obj['class']
        if cls not in classes:
            continue
        cls_id = classes.index(cls)
        
        b = (float(obj['xmin']), float(obj['xmax']), float(obj['ymin']), float(obj['ymax']))
        bb = convert((w,h), b)
        # Write boundary box information in yolo format
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

print("Conversion is done.....")