In [1]:
'''
import necessary packages and functions
'''

import os
import re
import io
import csv
import cv2
import random

from scipy import ndarray

from skimage import io, color, transform, util
from skimage.color import rgb2gray
from skimage.io import imsave, imshow, imread
from skimage.transform import rescale, resize, downscale_local_mean


In [2]:
'''
define import_images function to pull from directory, convert to grayscale, append to list
'''
def import_images(dir_name, images, image_names):
    for root, dirnames, filenames in os.walk(dir_name):
        for filename in filenames:
            if re.search("\.(jpg|jpeg|png|bmp|tiff)$", filename):
                filepath = os.path.join(root, filename)
                image_names.append(filename)
                
                ## read images and add to list
                image = io.imread(fname=filepath)
                image_gray = rgb2gray(image)
                images.append(image_gray)
                
'''
define move_images function to move files between directories
'''            
def move_images(img_list, src_dir, dest_dir):
    for img in img_list:
        src = os.path.join(src_dir, img)
        dest = os.path.join(dest_dir, img)
        os.rename(src, dest)

'''
define split_dataset function for training, testing, and validation split
'''        
def split_dataset(imgs, src_dir, train_dir, test_dir, validation_dir, rseed = 420):
    imgs.sort
    random.seed(rseed)
    random.shuffle(imgs)
    split_1 = int(0.8 * len(imgs))
    split_2 = int(0.9 * len(imgs))
    train = imgs[:split_1]
    validation = imgs[split_1:split_2]
    test = imgs[split_2:]

    move_images(train, src_dir, train_dir)
    move_images(test, src_dir, test_dir)
    move_images(validation, src_dir, validation_dir)

'''
define image augmentation functions to amplify dataset
'''
def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def horizontal_flip(image_array: ndarray):
    # horizontal flip doesn't need skimage, it's easy as flipping the image array of pixels !
    return image_array[:, ::-1]

def vertical_flip(image_array: ndarray):
    # vertical flip doesn't need skimage, it's easy as flipping the image array of pixels !
    return image_array[::-1, :]

def augment_images(imgs, img_names):
    for i in range(len(imgs)):
        img_name = img_names[i]

        himg = horizontal_flip(imgs[i])
        hfilename='{}{}{}'.format(aug_dir, 'himg', img_name)
        cv2.imwrite(hfilename, himg)

        vimg = vertical_flip(imgs[i])
        vfilename='{}{}{}'.format(aug_dir, 'vimg', img_name)
        cv2.imwrite(vfilename, vimg)

        hvimg = horizontal_flip(vimg)
        hvfilename='{}{}{}'.format(aug_dir, 'hvimg', img_name)
        cv2.imwrite(hvfilename, hvimg)

In [3]:
'''
move images to 0/1 folders based on labels
'''

SOURCE_ROOT = '../../dataset/images/'
DEST_ROOT = '../../dataset/'

with open('../../dataset/image-labels.csv') as infile:
    next(infile)  # Skip the header row
    reader = csv.reader(infile)
    seen = set()
    for Order, External_ID, Label in reader:
        src = os.path.join(SOURCE_ROOT, External_ID)
        dest = os.path.join(DEST_ROOT, Label, External_ID)
        try:
            os.rename(src, dest)
        except WindowsError as e:
            print (e)
            

NameError: name 'WindowsError' is not defined

In [105]:
'''
move images to appropriate folder for training, testing, and validation
'''
yes_dir = '../../dataset/1/'
yes_imgs = []
yes_names = []
import_images(yes_dir, yes_imgs, yes_names)

yes_train = '../../data/train/1/'
yes_test = '../../data/test/1/'
yes_validation = '../../data/validation/1/'
split_dataset(yes_names, yes_dir, yes_train, yes_test, yes_validation)

no_dir = '../../dataset/0/'
no_imgs = []
no_names = []
import_images(no_dir, no_imgs, no_names)

no_train = '../../data/train/0/'
no_test = '../../data/test/0/'
no_validation = '../../data/validation/0/'
split_dataset(no_names, no_dir, no_train, no_test, no_validation)

    

In [114]:
'''
amplify training dataset via image augmentation
apply rotation and both horizontal and vertical flips to all images in train folders
'''
img_dir = '../../data/train/1'
aug_dir = '../../data_aug/1/'
imgs = []
img_names = []

import_images(img_dir, imgs, img_names)
augment_images(imgs, img_names)

img_dir = '../../data/train/0'
aug_dir = '../../data_aug/0/'
imgs = []
img_names = []

import_images(img_dir, imgs, img_names)
augment_images(imgs, img_names)