In [None]:
# import the necessary packages
import os
import uuid
from PIL import Image
from IPython.display import clear_output

# List path locations
BASE_PATH = './dataset'
BASE_DATASET_PATH = os.path.join(BASE_PATH, 'mixed-cropped', 'orimix-320-sm')
BASE_DATASET_EXT_PATH = os.path.join(BASE_PATH, 'mixed-cropped', 'extracted')

TRAIN_DATASET_PATH = os.path.join(BASE_DATASET_EXT_PATH, 'train')
TEST_DATASET_PATH = os.path.join(BASE_DATASET_EXT_PATH, 'test')

ANNOTATIONS_FILE = os.path.join(BASE_PATH, 'mixed-cropped', 'orimix-320-sm-annotations')
MAX_SIZE = 320

In [None]:
# Split dataset into training, validation, and testing with following ratio 0.8, 0.1, and 0.1
# Uncomment if your data isn't in correct folder

import splitfolders
splitfolders.ratio(BASE_DATASET_PATH, output = BASE_DATASET_EXT_PATH, seed = 1092, ratio = (0.9, 0, 0.1), group_prefix = None)

In [None]:
# Simple progressBar to monitor progress :D
# Credits: https://www.mikulskibartosz.name/how-to-display-a-progress-bar-in-jupyter-notebook/

def updateProgressBar(progress, bar_length = 50):
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))

    # Clear current cell output 
    clear_output(wait = True)
    # Print progress!
    print("Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100))

In [None]:
# Resize Image according to given files structure:
# PathToDir/ 
#     Label_A/ 
#         PathToImg_1.jpg
#         PathToImg_2.jpg
#         PathToImg_n.jpg
#         ....
#     Label_B/
#         PathToImg_1.jpg
#         PathToImg_2.jpg
#         PathToImg_n.jpg
#         ....
#     Label_n/
#         ....

# Generate Input and Output Location
IN_PATH = os.path.join(BASE_PATH, DATASET_FOLDER)
OUT_PATH = os.path.join(BASE_PATH, '{}-{}'.format(DATASET_FOLDER, str(MAX_SIZE)))
# Check if output folder doesn't exist, create new one
if os.path.exists(OUT_PATH) is False:
    os.mkdir(OUT_PATH)

# Open and Check input folder files number
labels = os.listdir(IN_PATH)
num_files = 0
cnt = 0

for label in labels:
    files = os.listdir(os.path.join(IN_PATH, label))
    # Calculate num files for progress
    num_files += len(files)
    # Check output label folder exists
    if os.path.exists(os.path.join(OUT_PATH, label)) is False:
        os.mkdir(os.path.join(OUT_PATH, label))

# Enumerate every label in IN_PATH
for label in labels:
    files = os.listdir(os.path.join(IN_PATH, label))
    # Enumerate every file in label folder
    for idx, file in enumerate(files):
        fName, fExt = os.path.splitext(file)
        if fExt is not '':
            # Generate spesicic file id, make sure everthing unique!
            fId = str(uuid.uuid4())[:14]
            # Load Image
            image = Image.open(os.path.join(IN_PATH, label, str(file))).convert('RGB')
            # Calculate presentage from actual to given size
            w, h = image.size
            if w > h:
                p = MAX_SIZE / w
            else:
                p = MAX_SIZE / h
            # Calculate target resized size that maintain original image ratio
            tw = int(w * p)
            th = int(h * p)
            # Resizing Image
            image = image.resize((tw, th))
            # Save Resized image
            # image.save(os.path.join(OUT_PATH, label, '{}-{}{}{}'.format(label, fId, idx, fExt)), 'JPEG')
            image.save(os.path.join(OUT_PATH, label, '{}{}'.format(fName, fExt)), 'JPEG')
            # Update Progress Bar
            cnt += 1
            updateProgressBar(cnt / num_files)

# Progress full/ complete task
updateProgressBar(1)

In [None]:
# Combine all images into 1 file and remove labeling folders
import shutil

for labels in os.listdir(TRAIN_DATASET_PATH):
    label_folder = os.path.join(TRAIN_DATASET_PATH, labels)
    for images in os.listdir(label_folder):
        shutil.move(os.path.join(label_folder, images), os.path.join(TRAIN_DATASET_PATH, images))
    os.rmdir(label_folder)

for labels in os.listdir(TEST_DATASET_PATH):
    label_folder = os.path.join(TEST_DATASET_PATH, labels)
    for images in os.listdir(label_folder):
        shutil.move(os.path.join(label_folder, images), os.path.join(TEST_DATASET_PATH, images))
    os.rmdir(label_folder)

In [None]:
# Insert annotation of each images in training and testing
for image in os.listdir(TRAIN_DATASET_PATH):
    label = image[0]
    fName, fExt = os.path.splitext(image)
    ann_path = os.path.join(ANNOTATIONS_FILE, '{}{}'.format(fName, '.xml'))
    target_path = os.path.join(TRAIN_DATASET_PATH, '{}{}'.format(fName, '.xml'))
    if os.path.isfile(ann_path):
        shutil.copyfile(ann_path, target_path)
    else:
        os.remove(os.path.join(TRAIN_DATASET_PATH, image))
        # print('file {} does not have xml'.format(fName))

for image in os.listdir(TEST_DATASET_PATH):
    label = image[0]
    fName, fExt = os.path.splitext(image)
    ann_path = os.path.join(ANNOTATIONS_FILE, '{}{}'.format(fName, '.xml'))
    target_path = os.path.join(TEST_DATASET_PATH, '{}{}'.format(fName, '.xml'))
    if os.path.isfile(ann_path):
        shutil.copyfile(ann_path, target_path)
    else:
        os.remove(os.path.join(TEST_DATASET_PATH, image))
        # print('file {} does not have xml'.format(fName))