In [102]:
'''
import necessary packages and functions
'''

import os
import re
import io
import csv
import cv2
import random

from skimage import io, color
from skimage.color import rgb2gray
from skimage.io import imsave, imshow, imread
from skimage.transform import rescale, resize, downscale_local_mean


In [103]:
'''
import images from directory, convert to grayscale, append to list
'''
def import_images(dir_name, images, image_names):
    for root, dirnames, filenames in os.walk(dir_name):
        for filename in filenames:
            if re.search("\.(jpg|jpeg|png|bmp|tiff)$", filename):
                filepath = os.path.join(root, filename)
                image_names.append(filename)
                
                ## read images and add to list
                image = io.imread(fname=filepath)
                image_gray = rgb2gray(image)
                images.append(image_gray)

image_dir = '../../dataset/images/'
images = []
image_names = []

import_images(image_dir, images, image_names)
            

In [104]:
'''
move images to 0/1 folders based on labels
'''

SOURCE_ROOT = '../../dataset/images/'
DEST_ROOT = '../../dataset/'

with open('../../dataset/image-labels.csv') as infile:
    next(infile)  # Skip the header row
    reader = csv.reader(infile)
    seen = set()
    for Order, External_ID, Label in reader:
        src = os.path.join(SOURCE_ROOT, External_ID)
        dest = os.path.join(DEST_ROOT, Label, External_ID)
        try:
            os.rename(src, dest)
        except WindowsError as e:
            print (e)
            

In [101]:
'''
setup training, testing, and validation split
'''
            
random.seed(420)
def move_images(img_list, src_dir, dest_dir):
    for img in img_list:
        src = os.path.join(src_dir, img)
        dest = os.path.join(dest_dir, img)
        os.rename(src, dest)
        
def split_dataset(imgs, src_dir, train_dir, test_dir, validation_dir):
    imgs.sort
    random.shuffle(imgs)
    split_1 = int(0.8 * len(imgs))
    split_2 = int(0.9 * len(imgs))
    train = imgs[:split_1]
    validation = imgs[split_1:split_2]
    test = imgs[split_2:]

    move_images(train, src_dir, train_dir)
    move_images(test, src_dir, test_dir)
    move_images(validation, src_dir, validation_dir)


In [100]:
'''
move images to appropriate folder for training, testing, and validation
'''
yes_dir = '../../dataset/1/'
yes_imgs = []
yes_names = []
import_images(yes_dir, yes_imgs, yes_names)

yes_train = '../../data/train/1/'
yes_test = '../../data/test/1/'
yes_validation = '../../data/validation/1/'
split_dataset(yes_names, yes_dir, yes_train, yes_test, yes_validation)

no_dir = '../../dataset/0/'
no_imgs = []
no_names = []
import_images(no_dir, no_imgs, no_names)

no_train = '../../data/train/0/'
no_test = '../../data/test/0/'
no_validation = '../../data/validation/0/'
split_dataset(no_names, no_dir, no_train, no_test, no_validation)

    

FileNotFoundError: [Errno 2] No such file or directory: '../../dataset/1/11624.jpg' -> '../../data/test/0/11624.jpg'

In [None]:
# move yes training data
move_images(yes_train, yes_dir, '../../data/train/1/')
# move yes testing data
move_images(yes_test, yes_dir, '../../data/test/1/')
# move yes validation data
move_images(yes_validation, yes_dir, '../../data/validation/1/')


# move no training data
move_images(no_train, no_dir, '../../data/train/0/')
# move no testing data
move_images(no_test, no_dir, '../../data/test/0/')
# move no validation data
move_images(no_validation, no_dir, '../../data/validation/0/')



In [38]:
'''
amplify training dataset via image augmentation
'''

import random
from scipy import ndarray
import skimage as sk
from skimage import transform
from skimage import util

def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def horizontal_flip(image_array: ndarray):
    # horizontal flip doesn't need skimage, it's easy as flipping the image array of pixels !
    return image_array[:, ::-1]

def vertical_flip(image_array: ndarray):
    # vertical flip doesn't need skimage, it's easy as flipping the image array of pixels !
    return image_array[::-1, :]


In [53]:
'''
apply rotation and both horizontal and vertical flips to all images in train folders
'''
img_dir = '../../data/train/1'
aug_dir = '../../transformed/1/'
imgs = []
img_names = []

import_images(img_dir, imgs, img_names)



In [54]:
for i in range(len(imgs)):
    img_name = img_names[i]
    
    himg = horizontal_flip(imgs[i])
    hfilename='{}{}{}'.format(aug_dir, 'himg', img_name)
    cv2.imwrite(hfilename, himg)

    vimg = vertical_flip(imgs[i])
    vfilename='{}{}{}'.format(aug_dir, 'vimg', img_name)
    cv2.imwrite(vfilename, vimg)
    
    hvimg = horizontal_flip(vimg)
    hvfilename='{}{}{}'.format(aug_dir, 'hvimg', img_name)
    cv2.imwrite(hvfilename, hvimg)
    
    rimg = random_rotation(imgs[i])
    rfilename='{}{}{}'.format(aug_dir, 'rimg', img_name)
    cv2.imwrite(rfilename, rimg)
    