In [1]:
from data_aug.data_aug import *
from data_aug.bbox_util import *
import numpy as np 
import cv2 
import matplotlib.pyplot as plt 
import pandas as pd
%matplotlib inline

# Data Augmentation

#### This notebook require the original training images and the training images bounding boxes annotation in csv format with each row having columns in the order of image, xmin, ymin, xmax, ymax

In [37]:
df = pd.read_csv('train_solution_bounding_boxes (1).csv')
df['class'] = 0

In [38]:
len(df['image'])

559

In [39]:
len(df['image'].unique())

355

In [40]:
imgs_list = df['image'].unique()

In [41]:
header = [['image', 'xmin', 'ymin', 'xmax', 'ymax']]
pd.DataFrame(header).to_csv('augmented_training_images_annotations.csv', index=False, header=False)

In [42]:
for i in range(len(imgs_list)):
    current_img_name = imgs_list[i]
    img = cv2.imread('training_images/{}'.format(current_img_name))[:,:,::-1]
    
    df_image = df[df['image'] == current_img_name]
    df_image = df_image.drop(['image'], axis = 1)
    bboxes = df_image.to_numpy()
    
    seq = Sequence([RandomHSV(50, 50, 50),RandomHorizontalFlip(), RandomScale(0.3), RandomTranslate(0.3)])
    img_, bboxes_ = seq(img.copy(), bboxes.copy())
    
    new_img_name = current_img_name[0:-4] + '_dg.jpg'
    img_names = [new_img_name] * (bboxes_.shape[0])
    df_bboxes_ = pd.DataFrame(bboxes_)
    df_bboxes_ = df_bboxes_.drop([4], axis=1)   # drop the class column as it is not needed in our case
    df_bboxes_.insert (0, 'image', img_names)
    
    
    cv2.imwrite('augmented_training_images/{}'.format(new_img_name), img_)
    df_bboxes_.to_csv('augmented_training_images_annotations.csv', index=False, header=False, mode='a')

print("Data Augmentation Completed.")

Data Augmentation Completed.


#### The new annotation data will be saved in the file augmented_training_images_annotations.csv
#### The augmented images will be saved in the folder augmented_training_images

# Split Data into Train/Valid in YoLo format

In [43]:
df2 = pd.read_csv('augmented_training_images_annotations.csv')

In [44]:
dg_imgs = os.listdir('augmented_training_images/')

In [46]:
#create validation set of images
random.seed(4)
n = round(0.2 * len(dg_imgs))
val = random.sample(dg_imgs, n)
print('Number of validation images:', n)

Number of validation images: 71


In [47]:
#convert bounding box to yolo format (x_center, y_center, width, height and normalize 0 - 1)
frame = cv2.imread('augmented_training_images/' + dg_imgs[0])
h, w, colour = frame.shape

df2['class'] = 0
df2['x_center'] = (df2['xmin'] +  (df2['xmax'] - df2['xmin']) / 2 ) / w
df2['y_center'] = (df2['ymin'] +  (df2['ymax'] - df2['ymin']) / 2 ) / h
df2['width'] = (df2['xmax'] - df2['xmin']) / w
df2['height'] = (df2['ymax'] - df2['ymin']) / h

df2 = df2.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis = 1)

df2.head()

Unnamed: 0,image,class,x_center,y_center,width,height
0,vid_4_1000_dg.jpg,0,0.637188,0.627498,0.076555,0.106065
1,vid_4_10000_dg.jpg,0,0.31961,0.872251,0.188432,0.157443
2,vid_4_10040_dg.jpg,0,0.233681,0.342556,0.200586,0.17341
3,vid_4_10020_dg.jpg,0,0.752053,0.430424,0.202994,0.169306
4,vid_4_10060_dg.jpg,0,0.020266,0.313999,0.040533,0.102297


In [49]:
#Save images and labels - one text file per image
for im in dg_imgs:
  frame = cv2.imread('augmented_training_images/{}'.format(im))
  df_image = df2[df2['image'] == im]
  df_image = df_image.drop(['image'], axis = 1)
  df_image.head()
  lbl = im[0:-3] + 'txt'
  #print(lbl)
  
  if im in val:    
    cv2.imwrite('augmented_processed_data/yolo/valid/{}'.format(im), frame)
    df_image.to_csv('augmented_processed_data/yolo/valid/{}'.format(lbl), 
                    header=None, index=None, sep=' ', mode='a')
  else:
    cv2.imwrite('augmented_processed_data/yolo/train/{}'.format(im), frame)
    df_image.to_csv('augmented_processed_data/yolo/train/{}'.format(lbl), 
                    header=None, index=None, sep=' ', mode='a')

### All processed image files in yolo format are save inside the augmented_processed_data folder