<a href="https://colab.research.google.com/github/colurw/data_augmentation/blob/master/PascalVOC_Tiling_and_Augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Split JPGs into numbered tiles
In order to reduce RAM allocation during training

In [None]:
import os
import cv2
import math

TILE_SIZE = (2240, 2240)   # (x,y)
OFFSET = (2240, 2240)      # (x,y)
PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition/Mask_RCNN/skin data'

# create folder to receive output
os.chdir(PATH)
if not os.PATH.exists('tiled output'):
    os.mkdir('tiled output')

# iterate through training photos
for filename in os.listdir(PATH):
    if not filename.endswith('.JPG'): 
        continue

    # check dimensions
    img = cv2.imread(str(PATH)+'/'+str(filename))  
    img_shape = img.shape                           # all photos are 4480 x 6720 (y,x)!

    # split each photo into six (3x2) tiles
    for i in range(int(math.ceil(img_shape[0] / (OFFSET[1] * 1.0)))):
        for j in range(int(math.ceil(img_shape[1] / (OFFSET[0] * 1.0)))):
            cropped_img = img[OFFSET[1] * i : min(OFFSET[1] * i+TILE_SIZE[1], img_shape[0]), \
                          OFFSET[0]*j : min(OFFSET[0] * j+TILE_SIZE[0], img_shape[1])]
            x_min = OFFSET[0] * j
            y_min = OFFSET[1] * i
            x_max = min(OFFSET[0] * j + TILE_SIZE[0], img_shape[1])
            y_max = min(OFFSET[1] * i + TILE_SIZE[1], img_shape[0])
            print('relative coordinates: (', '%4.f' % x_min, ', %4.f' % y_min, ')   filename: '  + str(filename[:-4]) + '__' + str(i) + '_' + str(j) + '.JPG')
            
            # save image tiles to output folder using original filename and tile reference
            cv2.imwrite(str(PATH) + '/tiled output/' + str(filename[:-4]) + '__' + str(i) + '_' + str(j) + '.JPG', cropped_img)
            

# Split XML label files into numbered tiles
To match tiled images.  Appends object count in each class to the filename for easy sorting later

In [None]:
import os
import xml.etree.ElementTree as ET
import math

IMG_SHAPE = (4480, 6720)    # (y,x)!   
TILE_SIZE = (2240, 2240)    # (x,y)
OFFSET = (2240, 2240)       # (x,y)
PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition/Mask_RCNN/skin data'

# create folder to receive output
os.chdir(PATH)
if not os.path.exists('tiled output'):
    os.mkdir('tiled output')

# iterate through xml files
for filename in os.listdir(PATH):
    if not filename.endswith('.xml'): 
        continue
    fullname = os.path.join(PATH, filename)

    # iterate through tile boundaries
    for i in range(int(math.ceil(IMG_SHAPE[0] / (OFFSET[1] * 1.0)))):
        for j in range(int(math.ceil(IMG_SHAPE[1] / (OFFSET[0] * 1.0)))):
            X_MIN = OFFSET[0]*j
            Y_MIN = OFFSET[1]*i
            X_MAX = min(OFFSET[0]*j+TILE_SIZE[0], IMG_SHAPE[1])
            Y_MAX = min(OFFSET[1]*i+TILE_SIZE[1], IMG_SHAPE[0])
            print('tile relative coordinates: (', '%4.f' % X_MIN, ', %4.f' % Y_MIN, ')   filename: ' + str(filename[:-4]) + "_" + str(i) + '_' + str(j) + '_...' )

            # reload parent xml file
            tree = ET.parse(fullname)
            root = tree.getroot()

            # remove objects outside current tile boundary
            for object in root.findall('object'):
                bndbox = object.find('bndbox')
                Xmin_ = bndbox.find('xmin')
                Xmax_ = bndbox.find('xmax')
                Ymin_ = bndbox.find('ymin')
                Ymax_ = bndbox.find('ymax')
                if int(Xmin_.text) < X_MIN or \
                int(Xmax_.text) > X_MAX or \
                int(Ymin_.text) < Y_MIN or \
                int(Ymax_.text) > Y_MAX: 
                    root.remove(object)

            # count remaining objects
            count_fb = 0
            for any in root.findall("./object/[name='follicular_bump']"):
                count_fb = count_fb + 1
            
            count_pe = 0
            for any in root.findall("./object/[name='pre_erupted']"):
                count_pe = count_pe + 1

            # update linked JPG reference
            _filename = root.find('filename')
            newfn = str(_filename.text[:-4] + "__" + str(i) + "_" + str(j) + ".JPG")
            _filename.text = str(newfn)
            
            _PATH = root.find('PATH')
            newpa = str(_PATH.text[:-4] + "__" + str(i) + "_" + str(j) + ".JPG")
            _PATH.text = str(newpa)

            # update width & height references
            size = root.find('size')
            _width = size.find('width')
            _width.text = str(TILE_SIZE[0])
            _height = size.find('height')
            _height.text = str(TILE_SIZE[1])

            # subtract tile relative coordinates from each feature's bounding box
            for _xmin in root.iter('xmin'):
                new_xmin = int(_xmin.text) - X_MIN
                _xmin.text = str(new_xmin)
                #xmin.set('updated', 'yes')

            for _ymin in root.iter('ymin'):
                new_ymin = int(_ymin.text) - Y_MIN
                _ymin.text = str(new_ymin)
                #xmin.set('updated', 'yes')

            for _xmax in root.iter('xmax'):
                new_xmax = int(_xmax.text) - X_MIN
                _xmax.text = str(new_xmax)
                #xmin.set('updated', 'yes')

            for _ymax in root.iter('ymax'):
                new_ymax = int(_ymax.text) - Y_MIN
                _ymax.text = str(new_ymax)
                #xmin.set('updated', 'yes')

            # save xml tiles to output folder using original filename and tile reference
            tree.write(str(PATH)+"/tiled output/"+str(filename[:-4])+"__"+str(i)+"_"+str(j)+"__"+str(count_fb).zfill(2)+"_"+str(count_pe).zfill(2)+".xml")
            

# Remove JPG / XML tiles without objects
Based on tags in the XML file name


In [None]:
import os
import shutil

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition/Mask_RCNN/skin data'
SOURCE = 'tiled output'

# create file to receive output
os.chdir(PATH)
if not os.path.exists('unlabelled output'):
    os.mkdir('unlabelled output')

# iterate through xml files
for file in os.listdir(SOURCE):
    if file.endswith('.xml'):
        obj_tags = file[-9:-4]
        img_name = str(file[:-11]+'.JPG')

        # if xml file contains no objects, send xml file and matching jpg to output folder 
        if obj_tags == str('00_00'):
            print(file)
            shutil.move(os.path.join(SOURCE, file), 'unlabelled output')
            shutil.move(os.path.join(SOURCE, img_name), 'unlabelled output')

# Mirror JPGs on YY axis
To double the training data

In [None]:
import os
import cv2

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'

# iterate through image tiles
for filename in os.listdir(PATH):
    if not filename.endswith('.JPG'): 
        continue
    
    # flip and save with YY prefix
    img = cv2.imread(PATH+'/'+filename)
    img_flip_lr = cv2.flip(img, 1)
    cv2.imwrite(PATH +'/YY_'+str(filename), img_flip_lr)

print('finished')

# Mirror XML label files on YY axis

In [None]:
import os
import xml.etree.ElementTree as ET

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'

# iterate through xml files in folder
for filename in os.listdir(PATH):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(PATH, filename)
    tree = ET.parse(fullname)
    root = tree.getroot()

    # add YY prefix to linked jpg reference
    filename_ = root.find('filename')
    newfn = 'YY_'+str(filename_.text)
    filename_.text = str(newfn)
    print(str(newfn))

    # add YY prefix to linked jpg path
    path_ = root.find('path')
    newpath = path_.text[:74] +str(newfn)+'.xml'
    path_.text = str(newpath)

    # get image width
    imagewidth = int(root[4][0].text)
    print('width:',imagewidth)

    # iterate through all object bounding boxes
    for object in root.findall('object'):
        bndbox = object.find('bndbox')
        Xmin_ = bndbox.find('xmin')
        Xmax_ = bndbox.find('xmax')
        
        # rewrite bounding box coordinates to apply YY flip
        Xmin_1 = imagewidth-int(Xmax_.text)
        Xmax_1 = imagewidth-int(Xmin_.text)
        print(Xmin_.text, Xmax_.text,' >>> ', Xmin_1, Xmax_1)
        Xmin_.text = str(Xmin_1)
        Xmax_.text = str(Xmax_1)

    # save xml file with YY prefix
    tree.write(PATH+'/YY_'+str(filename))

# Mirror JPGs on XX axis
To quadruple the training data

In [None]:
import os
import cv2

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'

# iterate through image tiles
for filename in os.listdir(PATH):
    if not filename.endswith('.JPG'): continue
    
    # flip and save with XX prefix
    img = cv2.imread(PATH +'/'+ filename)
    img_flip_ud = cv2.flip(img, 0)
    cv2.imwrite(PATH +'/XX_'+str(filename), img_flip_ud)

print('finished')

# Mirror XML label files on XX axis

In [None]:
import os
import xml.etree.ElementTree as ET

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'

# iterate through xml files in folder
for filename in os.listdir(PATH):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(PATH, filename)
    tree = ET.parse(fullname)
    root = tree.getroot()

    # add XX prefix to linked jpg reference
    filename_ = root.find('filename')
    newfn = 'XX_'+str(filename_.text)
    filename_.text = str(newfn)
    print(str(newfn))

    # add YY prefix to linked jpg path
    path_ = root.find('path')
    newpath = path_.text[:-12] +str(newfn)+'.xml'
    path_.text = str(newpath)

    # get image height
    imageheight = int(root[4][1].text)
    print('height:',imageheight)

    # iterate through all object bounding boxes
    for object in root.findall('object'):
        bndbox = object.find('bndbox')
        Ymin_ = bndbox.find('ymin')
        Ymax_ = bndbox.find('ymax')

        # rewrite bounding box coordinates to apply XX flip
        Ymin_1 = imageheight-int(Ymax_.text)
        Ymax_1 = imageheight-int(Ymin_.text)
        print(Ymin_.text, Ymax_.text,' >>> ', Ymin_1, Ymax_1)
        Ymin_.text = str(Ymin_1)
        Ymax_.text = str(Ymax_1)

    # save xml file with XX prefix
    tree.write(PATH+'/XX_'+str(filename))

# Remove object tags from XML filenames

In [None]:
import os
path = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'

for oldname in os.listdir(path):
    if oldname.endswith('.xml'):
        newname = oldname[:-11] + '.xml'
        os.rename(os.path.join(path, oldname), os.path.join(path, newname))

print('finished')

# Set aside some JPG / XML pairs for test dataset 

In [None]:
import os
import shutil
from random import seed
from random import randint

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition/Mask_RCNN/skin data'
SOURCE = 'tiled output'

# count file pairs in folder
file_num = 0
os.chdir(PATH)
for filename in os.listdir(SOURCE):
    if filename.endswith('.xml'):
        file_num +=1

# choose how many to set aside
print('Number of labelled image files in folder:', file_num)
print('How many to move to validation dataset ?')
files_to_set_aside = input()

# create folder to receive selected files
if not os.PATH.exists('validation set'):
    os.mkdir('validation set')

# iterate through xml files and select at random
count = 0
while count < int(files_to_set_aside):
    for filename in os.listdir(SOURCE):
        if filename.endswith('.xml'):
            img_name = str(filename[:-11]+'.JPG')
            rand_num = randint(0, 50)
            if rand_num == 50:
                print(img_name)
                print(filename,'\n')
                count +=  1

                # move chosen xml/jpg file pair to new folder
                shutil.move(os.PATH.join(SOURCE, filename), 'validation set')
                shutil.move(os.PATH.join(SOURCE, img_name), 'validation set')
                

# Rotate JPGs clockwise
Doubles traing data for each ANGLE entered.

In [None]:
from PIL import Image
import os

ANGLE = 10   # must be positive 
PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'

# create folder to receive files
os.chdir(PATH)
if not os.PATH.exists('rotated'):
    os.mkdir('rotated')

# iterate through images
for filename in os.listdir(PATH):
    if not filename.endswith('.JPG'): continue
    fullname = os.path.join(PATH, filename)
    image = Image.open(fullname)

    # rotate image
    output = image.rotate(-ANGLE, resample=3, expand=False)
    print(image.size, '>>>', output.size)

    # save image with CW_ANGLE prefixed to filename 
    output.save(PATH+'/rotated/CW'+str(ANGLE)+'__'+str(filename))

# Rotate XML label files clockwise

In [None]:
import os
import xml.etree.ElementTree as ET
import math

ANGLE = 10                # must be positive 
EXPANDED_PHOTO = False    # should match expand= arg in image.rotate() in previous cell
CIRCULAR_OBJECTS = True   # bounding boxes will expand when set to False
PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'

# create folder to receive files
os.chdir(PATH)
if not os.path.exists('rotated'):
    os.mkdir('rotated')

# iterate through xml files
for filename in os.listdir(PATH):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(PATH, filename)
    tree = ET.parse(fullname)
    root = tree.getroot()

    # update filename reference
    filename_ = root.find('filename')
    newfn = 'CW'+str(ANGLE)+'__'+str(filename_.text)
    filename_.text = str(newfn)
    print(str(newfn))

    # calculate centre of rotation
    imagewidth = int(root[4][0].text)
    imageheight = int(root[4][1].text)
    rotation_centre = imagewidth/2, imageheight/2
    theta = ANGLE * 3.14159 / 180
    
    # calculate size of expanded image
    if EXPANDED_PHOTO == True:
        expanded_image_dims = math.ceil(math.cos(theta) * imagewidth + math.sin(theta) * imageheight), \
                              math.ceil(math.sin(theta) * imagewidth + math.cos(theta) * imageheight)
        
        # expand bounding box to avoid clipping rotated objects
        if CIRCULAR_OBJECTS == True:
            expansion_factor = 1
        else:
            expansion_factor = math.ceil(expanded_image_dims[0] / imagewidth)
        
        # update width & height references
        size = root.find('size')
        _width = size.find('width')
        _width.text = str(expanded_image_dims[0])
        _height = size.find('height')
        _height.text = str(expanded_image_dims[1])
        
        # calculate remapping vector
        remap_vector = math.ceil(expanded_image_dims[0]/2 - imagewidth/2), \
                       math.ceil(expanded_image_dims[1]/2 - imageheight/2)
    else:
        remap_vector = (0, 0)
        expansion_factor = 1
    
    # get box coordinates for each object 
    for object in root.findall('object'):
        bndbox = object.find('bndbox')
        Xmin_ = bndbox.find('xmin')
        Xmin = int(Xmin_.text)
        Ymin_ = bndbox.find('ymin')
        Ymin = int(Ymin_.text)
        Xmax_ = bndbox.find('xmax')
        Xmax = int(Xmax_.text)
        Ymax_ = bndbox.find('ymax')
        Ymax = int(Ymax_.text)

        # calculate centre of object
        obj_centre = (Xmax/2 + Xmin/2), (Ymax/2 + Ymin/2)
        ox, oy = rotation_centre[0], rotation_centre[1]

        # apply rotation and remap to expanded image
        Xobjcentr1 = math.cos(theta) * (obj_centre[0]-ox) - math.sin(theta) * (obj_centre[1]-oy) + ox + remap_vector[0]
        Yobjcentr1 = math.sin(theta) * (obj_centre[0]-ox) + math.cos(theta) * (obj_centre[1]-oy) + oy + remap_vector[1]
        
        # rebuild bounding box
        Xwidth = Xmax - Xmin
        Ywidth = Ymax - Ymin
        Xmin1 = math.ceil(Xobjcentr1 - Xwidth/2 * expansion_factor)  
        Ymin1 = math.ceil(Yobjcentr1 - Ywidth/2 * expansion_factor)      
        Xmax1 = math.ceil(Xobjcentr1 + Xwidth/2 * expansion_factor)
        Ymax1 = math.ceil(Yobjcentr1 + Ywidth/2 * expansion_factor)

        # overwrite old bounding box
        Xmin_.text = str(Xmin1)
        Ymin_.text = str(Ymin1)
        Xmax_.text = str(Xmax1)
        Ymax_.text = str(Ymax1)

    # remove out-of-bounds objects
    if EXPANDED_PHOTO == False:
        for object in root.findall('object'):
            bndbox = object.find('bndbox')
            Xmin_ = bndbox.find('xmin')
            Xmax_ = bndbox.find('xmax')
            Ymin_ = bndbox.find('ymin')
            Ymax_ = bndbox.find('ymax')
            if int(Xmin_.text) < 0 or \
            int(Xmax_.text) > imageheight or \
            int(Ymin_.text) < 0 or \
            int(Ymax_.text) > imagewidth: 
                root.remove(object)

    # save copy of rotated xml file with CW_ANGLE prefixed to filename
    tree.write(PATH+'/rotated/CW'+str(ANGLE)+'__'+str(filename))

# Mirror rotated JPGs on XX axis
Gives equivalent anti-clockwise rotation


In [None]:
import os
import cv2

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output/rotated'

# iterate through rotated images
for filename in os.listdir(PATH):
    if not filename.endswith('.JPG'): continue
    
    # apply XX flip, save and prefix filename with ACW
    img = cv2.imread(PATH +'/'+ filename)
    img_flip_ud = cv2.flip(img, 0)
    cv2.imwrite(path +'/A'+str(filename), img_flip_ud)

print('finished')

# Mirror rotated XML label files on XX axis

In [None]:
import os
import xml.etree.ElementTree as ET

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output/rotated'

# iterate through xml files
for filename in os.listdir(PATH):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(PATH, filename)
    tree = ET.parse(fullname)
    root = tree.getroot()

    # add ACW prefix to linked jpg reference
    filename_ = root.find('filename')
    newfn = 'A'+str(filename_.text)
    filename_.text = str(newfn)
    print(str(newfn))

    # add ACW prefix to linked jpg path
    path_ = root.find('path')
    newpath = path_.text[:-12] +str(newfn)+'.xml'
    path_.text = str(newpath)

    # get image height
    imageheight = int(root[4][1].text)
    print('height:',imageheight)

    # iterate object bounding boxes and apply XX flip
    for object in root.findall('object'):
        bndbox = object.find('bndbox')
        Ymin_ = bndbox.find('ymin')
        Ymax_ = bndbox.find('ymax')
        Ymin_1 = imageheight-int(Ymax_.text)
        Ymax_1 = imageheight-int(Ymin_.text)
        print(Ymin_.text, Ymax_.text,' >>> ', Ymin_1, Ymax_1)
        Ymin_.text = str(Ymin_1)
        Ymax_.text = str(Ymax_1)

    # save xml file with ACW prefix to filename
    tree.write(path+'/A'+str(filename))

# Move rotated files to same folder as non-rotated




In [None]:
import os
import shutil

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'
SOURCE = 'rotated'

os.chdir(PATH)
for filename in os.listdir(SOURCE):
    shutil.move(os.path.join(SOURCE, filename), PATH)

print('finished')

# Renumber file pairs

In [None]:
import os

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition//skin data/tiled output'

# iterate through xml files
count = 1
for name in os.listdir(PATH):
    if name.endswith('.xml'):
        
        # rename file in format 00001.xml
        new_name = str(count).zfill(5) +'.xml'
        os.rename(os.path.join(PATH, name), os.path.join(PATH, new_name))
        
        # find matching jpg file and give matching name 00001.jpg etc
        imagename = name[:-4] +'.JPG'
        new_imagename = str(count).zfill(5) +'.JPG'
        os.rename(os.path.join(path, imagename), os.path.join(path, new_imagename))
        count += 1

print('finished')

# Move JPGs and XMLs to separate folders

In [None]:
import os
import shutil

PATH = '/content/drive/My Drive/Colab Notebooks/HD_object_recognition/Mask_RCNN/skin data'
SOURCE = 'tiled output'
ANN_DEST = 'annots'
IM_DEST = 'images'

# create folder to recieve files
os.chdir(PATH)
if not os.path.exists(IM_DEST):
    os.mkdir(IM_DEST)
if not os.path.exists(ANN_DEST):
    os.mkdir(ANN_DEST)

# move files
for filename in os.listdir(SOURCE):
    if filename.endswith('.xml'):
        shutil.move(os.path.join(SOURCE, filename), os.path.join(PATH, ANN_DEST))

for filename in os.listdir(SOURCE):
    if filename.endswith('.JPG'):
        shutil.move(os.path.join(SOURCE, filename), os.path.join(PATH, IM_DEST))

print('finished')