<a href="https://colab.research.google.com/github/colurw/data_augmentation/blob/master/PascalVOC_Tiling_and_Augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Split JPGs into numbered tiles

In [0]:
import os
import cv2
import math

tile_size = (2240, 2240)   ## (x,y)
offset = (2240, 2240)      ## (x,y)
# tile_size = (3360, 2240)
# offset = (3360, 2240)    

path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names'

os.chdir(path)
if not os.path.exists('tiled output'):
    os.mkdir('tiled output')

for filename in os.listdir(path):
    if not filename.endswith('.JPG'): continue

    img = cv2.imread(str(path)+'/'+str(filename))  ## 6720, 4480
    img_shape = img.shape  ## 4480, 6720 (y,x)!

    for i in range(int(math.ceil(img_shape[0] / (offset[1] * 1.0)))):
        for j in range(int(math.ceil(img_shape[1] / (offset[0] * 1.0)))):
            cropped_img = img[offset[1] * i : min(offset[1] * i+tile_size[1], img_shape[0]),  offset[0]*j : min(offset[0] * j+tile_size[0], img_shape[1])]
            X_MIN = offset[0] * j
            Y_MIN = offset[1] * i
            X_MAX = min(offset[0] * j + tile_size[0], img_shape[1])
            Y_MAX = min(offset[1] * i + tile_size[1], img_shape[0])
            print('relative coordinates: (', '%4.f' % X_MIN, ', %4.f' % Y_MIN, ')   filename: '  + str(filename[:-4]) + '__' + str(i) + '_' + str(j) + '.JPG')
            cv2.imwrite(str(path) + '/tiled output/' + str(filename[:-4]) + '__' + str(i) + '_' + str(j) + '.JPG', cropped_img)

# Split Pascal VOC XML files into numbered tiles
Appends object count in each class to the filename for easy sorting later

In [0]:
import os
import xml.etree.ElementTree as ET
import math

img_shape = (4480, 6720)    ## (y,x)!  ## if images are different sizes, parse xml root for 'height' and 'width'. 
# tile_size = (1680, 1494)  ## (x,y)   ## try: img_shape = (int(root[4][1].text), int(root[4][0].text))
# offset = (1680, 1495)     ## (x,y)
tile_size = (2240, 2240)    ## (x,y)
offset = (2240, 2240)       ## (x,y)
path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names'

os.chdir(path)
if not os.path.exists('tiled output'):
    os.mkdir('tiled output')

for filename in os.listdir(path):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(path, filename)

    ## calculate tile boundaries
    for i in range(int(math.ceil(img_shape[0] / (offset[1] * 1.0)))):
        for j in range(int(math.ceil(img_shape[1] / (offset[0] * 1.0)))):
            X_MIN = offset[0]*j
            Y_MIN = offset[1]*i
            X_MAX = min(offset[0]*j+tile_size[0], img_shape[1])
            Y_MAX = min(offset[1]*i+tile_size[1], img_shape[0])
            print('tile relative coordinates: (', '%4.f' % X_MIN, ', %4.f' % Y_MIN, ')   filename: ' + str(filename[:-4]) + "_" + str(i) + '_' + str(j) + '_...' )

            ## reload parent file
            tree = ET.parse(fullname)
            root = tree.getroot()

            ## remove objects outside boundary
            for object in root.findall('object'):
                bndbox = object.find('bndbox')
                Xmin_ = bndbox.find('xmin')
                Xmax_ = bndbox.find('xmax')
                Ymin_ = bndbox.find('ymin')
                Ymax_ = bndbox.find('ymax')
                if int(Xmin_.text) < X_MIN or \
                int(Xmax_.text) > X_MAX or \
                int(Ymin_.text) < Y_MIN or \
                int(Ymax_.text) > Y_MAX: 
                    root.remove(object)

            ## count remaining objects
            count_fb = 0
            for any in root.findall("./object/[name='follicular_bump']"):
                count_fb = count_fb + 1
            
            count_pe = 0
            for any in root.findall("./object/[name='pre_erupted']"):
                count_pe = count_pe + 1

            ## update linked JPG reference
            _filename = root.find('filename')
            newfn = str(_filename.text[:-4] + "__" + str(i) + "_" + str(j) + ".JPG")
            _filename.text = str(newfn)
            
            _path = root.find('path')
            newpa = str(_path.text[:-4] + "__" + str(i) + "_" + str(j) + ".JPG")
            _path.text = str(newpa)

            ## update width & height references
            size = root.find('size')
            _width = size.find('width')
            _width.text = str(tile_size[0])
            _height = size.find('height')
            _height.text = str(tile_size[1])

            ## subtract tile relative coordinates from each bounding box
            for _xmin in root.iter('xmin'):
                new_xmin = int(_xmin.text) - X_MIN
                _xmin.text = str(new_xmin)
                #xmin.set('updated', 'yes')

            for _ymin in root.iter('ymin'):
                new_ymin = int(_ymin.text) - Y_MIN
                _ymin.text = str(new_ymin)
                #xmin.set('updated', 'yes')

            for _xmax in root.iter('xmax'):
                new_xmax = int(_xmax.text) - X_MIN
                _xmax.text = str(new_xmax)
                #xmin.set('updated', 'yes')

            for _ymax in root.iter('ymax'):
                new_ymax = int(_ymax.text) - Y_MIN
                _ymax.text = str(new_ymax)
                #xmin.set('updated', 'yes')

            ## save child files
            tree.write(str(path)+"/tiled output/"+str(filename[:-4])+"__"+str(i)+"_"+str(j)+"__"+str(count_fb)+"_"+str(count_pe)+".xml")

# Remove JPG / Pascal VOC XML tiles without objects
Based on tags in the XML file name


In [0]:
import os
import shutil

path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names'
source = 'tiled output'

os.chdir(path)
if not os.path.exists('unlabelled output'):
    os.mkdir('unlabelled output')

for f in os.listdir(source):
    if f.endswith('.xml'):
        obj_tags = f[-7:-4]
        img_name = str(f[:-9]+'.JPG')

        if obj_tags == str('0_0'):
            shutil.move(os.path.join(source, f), 'unlabelled output')
            shutil.move(os.path.join(source, img_name), 'unlabelled output')

# Mirror JPGs on YY axis

In [0]:
import os
import cv2

path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names/tiled output'
for filename in os.listdir(path):
    if not filename.endswith('.JPG'): continue
    
    img = cv2.imread(path+'/'+filename)
    img_flip_lr = cv2.flip(img, 1)
    cv2.imwrite(path +'/YY_'+str(filename), img_flip_lr)

# Mirror JPGs on XX axis

In [0]:
import os
import cv2

path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names/tiled output'

for filename in os.listdir(path):
    if not filename.endswith('.JPG'): continue
    
    img = cv2.imread(path +'/'+ filename)
    img_flip_ud = cv2.flip(img, 0)
    cv2.imwrite(path +'/XX_'+str(filename), img_flip_ud)

# Mirror Pascal VOC XML files on YY axis

In [0]:
import os
import xml.etree.ElementTree as ET

path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names/tiled output'
for filename in os.listdir(path):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(path, filename)
    tree = ET.parse(fullname)
    root = tree.getroot()

    filename_ = root.find('filename')
    newfn = 'YY_'+str(filename_.text)
    filename_.text = str(newfn)
    print(str(newfn))

    path_ = root.find('path')
    newpath = path_.text[:74] +str(newfn)+'.xml'
    path_.text = str(newpath)
    print(str(newpath))

    imagewidth = int(root[4][0].text)
    print('width:',imagewidth)

    for object in root.findall('object'):
        bndbox = object.find('bndbox')
        Xmin_ = bndbox.find('xmin')
        Xmax_ = bndbox.find('xmax')
        Xmin_1 = imagewidth-int(Xmax_.text)
        Xmax_1 = imagewidth-int(Xmin_.text)
        print(Xmin_.text, Xmax_.text,' >>> ', Xmin_1, Xmax_1)
        Xmin_.text = str(Xmin_1)
        Xmax_.text = str(Xmax_1)

    tree.write(path+'/YY_'+str(filename))

# Mirror Pascal VOC XML files on XX axis

In [0]:
import os
import xml.etree.ElementTree as ET

path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names/tiled output'
for filename in os.listdir(path):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(path, filename)
    tree = ET.parse(fullname)
    root = tree.getroot()

    filename_ = root.find('filename')
    newfn = 'XX_'+str(filename_.text)
    filename_.text = str(newfn)
    print(str(newfn))

    path_ = root.find('path')
    newpath = path_.text[:-12] +str(newfn)+'.xml'
    path_.text = str(newpath)
    print(str(newpath))

    imageheight = int(root[4][1].text)
    print('height:',imageheight)

    for object in root.findall('object'):
        bndbox = object.find('bndbox')
        Ymin_ = bndbox.find('ymin')
        Ymax_ = bndbox.find('ymax')
        Ymin_1 = imageheight-int(Ymax_.text)
        Ymax_1 = imageheight-int(Ymin_.text)
        print(Ymin_.text, Ymax_.text,' >>> ', Ymin_1, Ymax_1)
        Ymin_.text = str(Ymin_1)
        Ymax_.text = str(Ymax_1)

    tree.write(path+'/XX_'+str(filename))

# Rotate JPGs clockwise

In [0]:
from PIL import Image
import os

angle = 10  ## must be positive ##
path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names/tiled output'

os.chdir(path)
if not os.path.exists('rotated'):
    os.mkdir('rotated')

for filename in os.listdir(path):
    if not filename.endswith('.JPG'): continue
    fullname = os.path.join(path, filename)
    image = Image.open(fullname)
    output = image.rotate(-angle, resample=3, expand=True)
    # display(output)
    print(image.size, '>>>', output.size)
    output.save(path+'/rotated/CW'+str(angle)+'__'+str(filename))

# Rotate Pascal VOC XML files clockwise

In [0]:
import os
import xml.etree.ElementTree as ET
import math

angle = 10               ## must be positive
expanded_photo = True    ## must be True, until out-of-bounds objects are removed ##
circular_objects = True
path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names/tiled output'

os.chdir(path)
if not os.path.exists('rotated'):
    os.mkdir('rotated')

for filename in os.listdir(path):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(path, filename)
    tree = ET.parse(fullname)
    root = tree.getroot()

    ## update filename reference
    filename_ = root.find('filename')
    newfn = 'CW'+str(angle)+'__'+str(filename_.text)
    filename_.text = str(newfn)
    print(str(newfn))

    ## calculate centre of rotation
    imagewidth = int(root[4][0].text)
    imageheight = int(root[4][1].text)
    rotation_centre = imagewidth/2, imageheight/2
    theta = angle * 3.14159 / 180
    
    ## calculate size of expanded images
    if expanded_photo == True:
        expanded_image_dims = math.ceil(math.cos(theta) * imagewidth + math.sin(theta) * imageheight), \
                              math.ceil(math.sin(theta) * imagewidth + math.cos(theta) * imageheight)
        
        ## expand bounding box to avoid clipping rotated objects
        if circular_objects == True:
            expansion_factor = 1
        else:
            expansion_factor = math.ceil(expanded_image_dims[0] / imagewidth)
        
        ## update width & height references
        size = root.find('size')
        _width = size.find('width')
        _width.text = str(expanded_image_dims[0])
        _height = size.find('height')
        _height.text = str(expanded_image_dims[1])
        
        ## calculate remapping vector
        remap_vector = math.ceil(expanded_image_dims[0]/2 - imagewidth/2), \
                       math.ceil(expanded_image_dims[1]/2 - imageheight/2)

    else:
        remap_vector = (0, 0)
        expansion_factor = 1
    
    ## get box coordinates for each object 
    for object in root.findall('object'):
        bndbox = object.find('bndbox')
        Xmin_ = bndbox.find('xmin')
        Xmin = int(Xmin_.text)
        Ymin_ = bndbox.find('ymin')
        Ymin = int(Ymin_.text)
        Xmax_ = bndbox.find('xmax')
        Xmax = int(Xmax_.text)
        Ymax_ = bndbox.find('ymax')
        Ymax = int(Ymax_.text)

        ## calculate centre of object
        obj_centre = (Xmax/2 + Xmin/2), (Ymax/2 + Ymin/2)
        ox, oy = rotation_centre[0], rotation_centre[1]

        ## apply rotation and remap to expanded image
        Xobjcentr1 = math.cos(theta) * (obj_centre[0]-ox) - math.sin(theta) * (obj_centre[1]-oy) + ox + remap_vector[0]
        Yobjcentr1 = math.sin(theta) * (obj_centre[0]-ox) + math.cos(theta) * (obj_centre[1]-oy) + oy + remap_vector[1]
        
        ## rebuild bounding box
        Xwidth = Xmax - Xmin
        Ywidth = Ymax - Ymin
        Xmin1 = math.ceil(Xobjcentr1 - Xwidth/2 * expansion_factor)  
        Ymin1 = math.ceil(Yobjcentr1 - Ywidth/2 * expansion_factor)      
        Xmax1 = math.ceil(Xobjcentr1 + Xwidth/2 * expansion_factor)
        Ymax1 = math.ceil(Yobjcentr1 + Ywidth/2 * expansion_factor)

        ## overwrite bounding box
        Xmin_.text = str(Xmin1)
        Ymin_.text = str(Ymin1)
        Xmax_.text = str(Xmax1)
        Ymax_.text = str(Ymax1)

    ## remove out-of-bounds objects
    ... # see 'remove objects outside boundary' loop in code block [2]

    ## save child file
    tree.write(path+'/rotated/CW'+str(angle)+'__'+str(filename))

# Mirror rotated JPGs on XX axis

In [0]:
import os
import cv2

path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names/tiled output/rotated'

for filename in os.listdir(path):
    if not filename.endswith('.JPG'): continue
    
    img = cv2.imread(path +'/'+ filename)
    img_flip_ud = cv2.flip(img, 0)
    cv2.imwrite(path +'/zXX_'+str(filename), img_flip_ud)

# Mirror rotated Pascal VOC XML on XX axis

In [0]:
import os
import xml.etree.ElementTree as ET

path = '/content/drive/My Drive/Colab Notebooks/ObjectRecognition/Mask_RCNN/skin data/copy fullsize original names/tiled output/rotated'
for filename in os.listdir(path):
    if not filename.endswith('.xml'): continue
    fullname = os.path.join(path, filename)
    tree = ET.parse(fullname)
    root = tree.getroot()

    filename_ = root.find('filename')
    newfn = 'zXX_'+str(filename_.text)
    filename_.text = str(newfn)
    print(str(newfn))

    path_ = root.find('path')
    newpath = path_.text[:-12] +str(newfn)+'.xml'
    path_.text = str(newpath)
    # print(str(newpath))

    imageheight = int(root[4][1].text)
    print('height:',imageheight)

    for object in root.findall('object'):
        bndbox = object.find('bndbox')
        Ymin_ = bndbox.find('ymin')
        Ymax_ = bndbox.find('ymax')
        Ymin_1 = imageheight-int(Ymax_.text)
        Ymax_1 = imageheight-int(Ymin_.text)
        print(Ymin_.text, Ymax_.text,' >>> ', Ymin_1, Ymax_1)
        Ymin_.text = str(Ymin_1)
        Ymax_.text = str(Ymax_1)

    tree.write(path+'/zXX_'+str(filename))