In [191]:
from data_aug.data_aug import *
from data_aug.bbox_util import *
import numpy as np 
import cv2 
import matplotlib.pyplot as plt 
import pandas as pd
%matplotlib inline

# Data Augmentation

#### This notebook require the original training images and the training images bounding boxes annotation in csv format with each row having columns in the order of image, xmin, ymin, xmax, ymax

In [196]:
csv_file = 'train_solution_bounding_boxes (1).csv'
csv_file2 = 'new_train_solution_bounding_boxes.csv'
df = pd.read_csv(csv_file2)
df['class'] = 0

In [197]:
len(df['image'])

440

In [198]:
len(df['image'].unique())

282

In [199]:
count = 0
header = [['image', 'xmin', 'ymin', 'xmax', 'ymax']]
pd.DataFrame(header).to_csv('augmented_training_images_annotations.csv', index=False, header=False)
imgs_list = df['image'].unique()
train_image_folder_path = "training_images"
train_image_folder_path2 = "new_data/images"

for j in range(round(2500 / len(imgs_list))):
    for i in range(len(imgs_list)):
        current_img_name = imgs_list[i]
        img = cv2.imread('new_data/images/{}'.format(current_img_name))[:,:,::-1]

        df_image = df[df['image'] == current_img_name]
        df_image = df_image.drop(['image'], axis = 1)
        bboxes = df_image.to_numpy()
        
        randomHSV = np.random.randint(25, 60)
        randomScale = random.uniform(0.3, 0.7)
        randomTranslate = random.uniform(0.1, 0.5)

        seq = Sequence([RandomHSV(randomHSV, randomHSV, randomHSV),RandomHorizontalFlip(), RandomScale(randomScale), RandomTranslate(randomTranslate)])
        img_, bboxes_ = seq(img.copy(), bboxes.copy())

        new_img_name = current_img_name[0:-4] + '_dg_' + str(count) + '.jpg'
        
        img_names = [new_img_name] * (bboxes_.shape[0])
        df_bboxes_ = pd.DataFrame(bboxes_)
        df_bboxes_ = df_bboxes_.drop([4], axis=1)   # drop the class column as it is not needed in our case
        df_bboxes_.insert (0, 'image', img_names)

        cv2.imwrite('augmented_training_images/{}'.format(new_img_name), img_)
        df_bboxes_.to_csv('augmented_training_images_annotations.csv', index=False, header=False, mode='a')

        count = count + 1


print("Data Augmentation Completed.")

print("Total Images Processed : ", count)

Data Augmentation Completed.
Total Images Processed :  2538


#### The new annotation data will be saved in the file augmented_training_images_annotations.csv
#### The augmented images will be saved in the folder augmented_training_images

# Split Data into Train/Valid in YoLo format

In [200]:
df2 = pd.read_csv('augmented_training_images_annotations.csv')

In [201]:
dg_imgs = os.listdir('augmented_training_images/')

In [202]:
#create validation set of images
random.seed(4)
n = round(0 * len(dg_imgs))
val = random.sample(dg_imgs, n)
print('Number of validation images:', n)

Number of validation images: 0


In [203]:
#convert bounding box to yolo format (x_center, y_center, width, height and normalize 0 - 1)
frame = cv2.imread('augmented_training_images/' + dg_imgs[0])
h, w, colour = frame.shape

df2['class'] = 0
df2['x_center'] = (df2['xmin'] +  (df2['xmax'] - df2['xmin']) / 2 ) / w
df2['y_center'] = (df2['ymin'] +  (df2['ymax'] - df2['ymin']) / 2 ) / h
df2['width'] = (df2['xmax'] - df2['xmin']) / w
df2['height'] = (df2['ymax'] - df2['ymin']) / h

df2 = df2.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis = 1)

df2.head()

Unnamed: 0,image,class,x_center,y_center,width,height
0,vid_4_1000_dg_0.jpg,0,0.55038,0.675466,0.095733,0.132634
1,vid_4_10000_dg_1.jpg,0,0.061275,0.56134,0.12255,0.149986
2,vid_4_10020_dg_2.jpg,0,0.097262,0.224205,0.069643,0.054902
3,vid_4_10040_dg_3.jpg,0,0.377856,0.366443,0.173402,0.149909
4,vid_4_10060_dg_4.jpg,0,0.185009,0.720221,0.204759,0.162887


In [204]:
#Save images and labels - one text file per image
for im in dg_imgs:
  frame = cv2.imread('augmented_training_images/{}'.format(im))
  df_image = df2[df2['image'] == im]
  df_image = df_image.drop(['image'], axis = 1)
  df_image.head()
  lbl = im[0:-3] + 'txt'
  #print(lbl)
  
  if im in val:    
    cv2.imwrite('augmented_processed_data/yolo/valid/{}'.format(im), frame)
    df_image.to_csv('augmented_processed_data/yolo/valid/{}'.format(lbl), 
                    header=None, index=None, sep=' ', mode='a')
  else:
    cv2.imwrite('augmented_processed_data/yolo/train/{}'.format(im), frame)
    df_image.to_csv('augmented_processed_data/yolo/train/{}'.format(lbl), 
                    header=None, index=None, sep=' ', mode='a')

### All processed image files in yolo format are saved inside the augmented_processed_data folder

### Reference:  https://github.com/Paperspace/DataAugmentationForObjectDetection

In [163]:
label_folder_path = 'new_data/labels'
image_folder_path = 'new_data/images'
labels = os.listdir(label_folder_path)

In [164]:
frame = cv2.imread(image_folder_path + '/vid_4_600.jpg')
h, w, colour = frame.shape

print('Height : ', h)
print('Width : ', w)

Height :  380
Width :  676


In [189]:
import csv

In [190]:
header = [['image', 'xmin', 'ymin', 'xmax', 'ymax']]
pd.DataFrame(header).to_csv('new_train_solution_bounding_boxes.csv', index=False, header=False)


for i in range(len(labels)):
    f = open(label_folder_path + "/" + labels[i], "r")
    lines = f.readlines()

    if len(lines) == 0:
        continue
        
    img_file_name = labels[i][0:-3] + 'jpg'
        
    for j in range(len(lines)):
        line = lines[j].rstrip()
        line = line.split(' ')
        
        x_center = float(line[1])
        y_center = float(line[2])
        normalized_width = float(line[3])
        normalized_height = float(line[4])
        
        xmin = x_center * w - ((normalized_width * w) / 2)
        ymin = y_center * h - ((normalized_height * h) / 2)
        xmax = xmin +  (normalized_width * w)
        ymax = ymin + (normalized_height * h)
        
        content = pd.DataFrame(np.array([[img_file_name, xmin, ymin, xmax, ymax]]))
        content.to_csv('new_train_solution_bounding_boxes.csv', index=False, header=False, mode='a')

print("New Image Files Created")

New Image Files Created
