In [1]:
import cv2
import os
import random
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage

In [2]:
############################################## PREPARE DATA ##############################################

# path with all labeled data
data_path = 'labeled_set/'

# save path for training and test set
training_path = 'training_set/'
test_path = 'test_set/'

# number of images used for the training set
num_train_data = 25

# rename image: new_img_name + img_count (starting from start_value)
new_img_name = 'mps_'
start_value = 0

In [None]:
#create folder if needed
for folder in [training_path, test_path]:
    try:
        os.mkdir(folder)
    except OSError:
        print ("Creation of the folder %s failed" % folder)
    else:
        print ("Successfully created the folder %s " % folder)

In [None]:
# randomly split data into training and test set
entries = os.listdir(data_path)
jpg_files = []
for image_file in entries:
    if(image_file.endswith(".jpg")):
        jpg_files.append(image_file)
        
random.shuffle(jpg_files)

training_set = jpg_files[:num_train_data]
test_set = jpg_files[num_train_data:]

# rename them as new_img_name + img_count and save them in training_set and test_set folder
img_count = start_value
for file in training_set:
    original_file_name = file[:-4]
    new_file_name = new_img_name+str(img_count)
    
    img = cv2.imread(data_path+original_file_name+".jpg")
    cv2.imwrite(training_path+new_file_name+".jpg", img)
    
    with open(training_path+new_file_name+".txt", 'w') as new_file:
        with open(data_path+original_file_name+".txt", 'r') as original_file:
            new_file.write(original_file.read())
    img_count += 1
    print(img_count)

for file in test_set:
    original_file_name = file[:-4]
    new_file_name = new_img_name+str(img_count)
    
    img = cv2.imread(data_path+original_file_name+".jpg")
    cv2.imwrite(test_path+new_file_name+".jpg", img)
    
    with open(test_path+new_file_name+".txt", 'w') as new_file:
        with open(data_path+original_file_name+".txt", 'r') as original_file:
            new_file.write(original_file.read())
    img_count += 1
    print(img_count)

In [6]:
############################################## AUGMENTATION ##############################################

# path for augmented data set
augmented_path = "augmented_set/"

# augmentation sequence
seq = iaa.Sequential([
    iaa.CropAndPad(percent=(-0.15, 0.25)),
    iaa.Sometimes(0.5, iaa.BlendAlphaBoundingBoxes(None, 
        background=iaa.BlendAlphaRegularGrid(nb_rows=(7, 30), nb_cols=(6, 24),
            foreground=iaa.Multiply(0.0),
            alpha=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]))),
    iaa.Sometimes(0.5, iaa.CoarseDropout(0.02, size_percent=0.5))
])

# number of copies per image, rotated 90° after each
num_copy = 8

In [None]:
#create folder if needed
try:
    os.mkdir(augmented_path)
except OSError:
    print ("Creation of the folder %s failed" % augmented_path)
else:
    print ("Successfully created the folder %s " % augmented_path)

In [None]:
entries = os.listdir(training_path)
for image_file in entries:
    print(image_file)
    if(not image_file.endswith(".jpg")):
        continue
    #load image
    img = cv2.imread(training_path+image_file)
    img_height = img.shape[0]
    img_width = img.shape[1]

    #load label
    original_file_name = image_file[:-4]
    label_file = original_file_name + ".txt"
    label = open(training_path+label_file, "r")
    yolo_labels = label.readlines()
    label.close()
    
    #convert into imgaug bounding box
    box_array = []
    for box in yolo_labels:
        bb_values = [bb.strip() for bb in box.split()]
        class_number = int(bb_values[0])
        yolo_x = float(bb_values[1])
        yolo_y = float(bb_values[2])
        yolo_width = float(bb_values[3])
        yolo_height = float(bb_values[4])

        box_width = yolo_width * img_width
        box_height = yolo_height * img_height
        x_min = int(yolo_x * img_width - (box_width / 2))
        y_min = int(yolo_y * img_height - (box_height / 2))
        x_max = int(yolo_x * img_width + (box_width / 2))
        y_max = int(yolo_y * img_height + (box_height / 2))
        box_array.append(BoundingBox(x_min, y_min, x_max, y_max, label=class_number))
    bbs = BoundingBoxesOnImage(box_array, shape=img.shape)
    
    #ia.imshow(bbs.draw_on_image(img))
    
    for i in range(num_copy):
        
        ### augmentation ###
        
        #rotate by i * 90°
        rot_image = iaa.Rot90((i), keep_size=False)
        image_rot, bbs_rot = rot_image(image=img, bounding_boxes=bbs)
        #apply augmentation sequence
        image_aug, bbs_aug = seq(image=image_rot, bounding_boxes=bbs_rot)
        #clip bounding boxes to image and remove them if <= 50% are left
        bbs_aug = bbs_aug.remove_out_of_image_fraction(0.5).clip_out_of_image()
        #ia.imshow(bbs_aug.draw_on_image(image_aug))

        #convert back and safe
        dheight = 1.0/image_aug.shape[0]
        dwidth = 1.0/image_aug.shape[1]
        file_name = original_file_name+"_"+str(i)
        with open(augmented_path+file_name+".txt", 'a') as label_file:
            for bb in bbs_aug:
                class_number = bb.label
                yolo_x = bb.center_x * dwidth
                yolo_y = bb.center_y * dheight
                yolo_width = bb.width * dwidth
                yolo_height = bb.height * dheight
                yolo_label = str(class_number)[:8]+" "+str(yolo_x)[:8]+" "+str(yolo_y)[:8]+" "+str(yolo_width)[:8]+" "+str(yolo_height)[:8]+"\n"

                label_file.write(yolo_label)
        cv2.imwrite(augmented_path+file_name+".jpg", image_aug)
    #break #use to test on a single image

In [None]:
############################################## CREATE TXT FILES ##############################################

#path in the darknet directory to the train and test data files
yolo_data_path = 'test_yolo/obj/'

#txt file with every trainings and test image
#uses yolo_data_path from darknet binary to image file
train_txt_file = 'train.txt'
test_txt_file = 'test.txt'

#path to directory with training and tests images
#use augmented_path since this data is used for training
train_txt_path = augmented_path
test_txt_path = test_path

In [None]:
# make test.txt and train.txt with jpg names (needed for training with darknet)

entries = os.listdir(train_txt_path)
with open(train_txt_file, 'a') as train_names:
    for file in entries:
        if(file.endswith(".jpg")):
            train_names.write(yolo_data_path+file+"\n")

entries = os.listdir(test_txt_path)
with open(test_txt_file, 'a') as test_names:
    for file in entries:
        if(file.endswith(".jpg")):
            test_names.write(yolo_data_path+file+"\n")