<a href="https://colab.research.google.com/github/ezeguins/NER/blob/main/augmentation_ts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DATA AUGMENTATION FOR BOUNDING BOX DETECTION DATASET
- Load dataset file from gdrive.
- Divide in three equal parts and rotate images and boundig boxes 90, 180 and 270 degrees.
- Save to gdrive rotated data and annotations with their new filesnames.



In [None]:
import os
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install imgaug &> /dev/null
from imgaug import augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
import numpy as np
import cv2
from google.colab.patches import cv2_imshow

In [None]:
# LOAD ORIGINAL DATASET FILE

outfile = "/content/gdrive/MyDrive/VGG16/dataset/datos.npz"
npzfile = np.load(outfile)
data1 = npzfile['arr_0']
targets1 = npzfile['arr_1']
filenames1 = npzfile['arr_2']
n = len(data1)


In [None]:
print(len(data1), len(targets1), len(filenames1))

857 857 857


In [None]:
# DATA SHUFFLING

from sklearn.utils import shuffle

data1, targets1, filenames1 = shuffle(data1, targets1, filenames1, random_state=0 )


In [None]:
# THREE EQUAL PARTS 90, 180 Y 270 DEGREES ROTATION 

seq = iaa.Sequential([iaa.Affine(rotate=(90))], random_order=True) # apply augmenters in random order
data = data1[:int(n/3)] * 255
targets = targets1[:int(n/3)] * 224
bbs = BoundingBoxesOnImage.from_xyxy_array(targets, shape=data1[0])
data_rotated90, targets = seq(images=data, bounding_boxes=bbs)

targets_rotated90=[]
targets_rotated90 = [[0,0,0,0] for i in range(len(data))] 
for idx, target in enumerate(targets.bounding_boxes):
  targets_rotated90[idx] = [target.x1, target.y1, target.x2, target.y2]


seq = iaa.Sequential([iaa.Affine(rotate=(-90))], random_order=True) # apply augmenters in random order
data = data1[int(n/3):int(2*n/3)] * 255
targets = targets1[int(n/3):int(2*n/3)] *224
bbs = BoundingBoxesOnImage.from_xyxy_array(targets, shape=data1[0])
data_rotated270, targets = seq(images=data, bounding_boxes=bbs)

targets_rotated270=[]
targets_rotated270 = [[0,0,0,0] for i in range(len(data))] 
for idx, target in enumerate(targets.bounding_boxes):
  targets_rotated270[idx] = [target.x1, target.y1, target.x2, target.y2]


seq = iaa.Sequential([iaa.Affine(rotate=(180))], random_order=True) # apply augmenters in random order
data = data1[int(2*n/3):] * 255
targets = targets1[int(2*n/3):] * 224
bbs = BoundingBoxesOnImage.from_xyxy_array(targets, shape=data1[0])
data_rotated180, targets = seq(images=data, bounding_boxes=bbs)

targets_rotated180=[]
targets_rotated180 = [[0,0,0,0] for i in range(len(data))] 
for idx, target in enumerate(targets.bounding_boxes):
  targets_rotated180[idx] = [target.x1, target.y1, target.x2, target.y2]


In [None]:
# IMAGES VERIFICATION
 
data = data1_rotated
targ = targets1_rotated
for idx, image in enumerate(data):
  (startX, startY, endX, endY) =[int(targ[idx][0]),int(targ[idx][1]),int(targ[idx][2]),int(targ[idx][3])]
  print(startX, startY, endX, endY)
  cv2.rectangle(image, (startX, startY), (endX, endY),	(0, 255, 0), 2)
  # show the output image
  cv2_imshow(image)
  #cv2.waitKey(0)

In [None]:
# FULL DATASET SAVING (ORIGINAL + ROTATED)

# FILES NAME MODIFICATION
filenames1_rotated = ["{}{}".format('SHU_',i) for i in filenames1]

# ROTATED DATASET CONCATENATION
data1_rotated = np.concatenate((data_rotated90, data_rotated270, data_rotated180), axis = 0)
targets1_rotated = targets_rotated90 + targets_rotated270 + targets_rotated180



In [None]:
data_rotated = np.array(data1_rotated, dtype = 'float32') / 255
targets_rotated = np.array(targets1_rotated, dtype= 'float32') /224
filenames_rotated = np.array(filenames1_rotated)

print(type(data_rotated))
print(type(targets_rotated))
print(type(filenames_rotated))


In [None]:
data_full = np.concatenate((data1, data_rotated), axis=0)
targets_full = np.concatenate((targets1, targets_rotated))
filenames_full = np.concatenate((filenames1, filenames_rotated))
print(len(data_full), len(targets_full), len(filenames_full))


In [None]:
# DATA SET RECORD TO CURRENT GOOGLE DRIVE

inicio = 0
fin = len(data_full)
print(fin)

# convert the data and targets to NumPy arrays, scaling the input
# pixel intensities from the range [0, 255] to [0, 1]

outfile_full = "/content/gdrive/MyDrive/VGG16/dataset/datos_full.npz"

np.savez(outfile_full, data_full, targets_full,filenames_full)