In [3]:
'''
move images to yes/no folders based on labels
'''

import csv
import os

SOURCE_ROOT = 'data_base/images_rescaled'
DEST_ROOT = 'data_base/'

with open('data_base/labels.csv') as infile:
    next(infile)  # Skip the header row
    reader = csv.reader(infile)
    seen = set()
    for Order, External_ID, Label in reader:
        src = os.path.join(SOURCE_ROOT, External_ID)
        dest = os.path.join(DEST_ROOT, Label, External_ID)
        try:
            os.rename(src, dest)
        except WindowsError as e:
            print (e)
            

In [4]:
'''
setup training and validation split
'''

import random

yes_dir = 'data_base/yes'
no_dir = 'data_base/no'
yes_imgs = []
no_imgs = []
random.seed(42)

# create list of .jpg files in yes_img directory
for root, dirs, files in os.walk(yes_dir):
    for file in files:
        if file.endswith('.jpg'):
            yes_imgs.append(file)

# create list of .jpg files in no_img directory
for root, dirs, files in os.walk(no_dir):
    for file in files:
        if file.endswith('.jpg'):
            no_imgs.append(file)            
            
yes_imgs.sort()  # make sure that the filenames have a fixed order before shuffling
random.shuffle(yes_imgs) # shuffles the ordering of filenames (deterministic given the chosen seed)

yes_split_1 = int(0.8 * len(yes_imgs))
yes_split_2 = int(0.9 * len(yes_imgs))
yes_train = yes_imgs[:yes_split_1]
yes_validation = yes_imgs[yes_split_1:yes_split_2]
yes_test = yes_imgs[yes_split_2:]

no_imgs.sort()  # make sure that the filenames have a fixed order before shuffling
random.shuffle(no_imgs) # shuffles the ordering of filenames (deterministic given the chosen seed)

no_split_1 = int(0.8 * len(no_imgs))
no_split_2 = int(0.9 * len(no_imgs))
no_train = no_imgs[:no_split_1]
no_validation = no_imgs[no_split_1:no_split_2]
no_test = no_imgs[no_split_2:]


In [10]:
'''
move images to appropriate folder for training, validation, and testing
'''

def move_images(img_list, src_dir, dest_dir):
    for img in img_list:
        src = os.path.join(src_dir, img)
        dest = os.path.join(dest_dir, img)
        os.rename(src, dest)

# move yes training data
move_images(yes_train, 'data_base/yes/', 'data/train/yes/')
# move no training data
move_images(no_train, 'data_base/no/', 'data/train/no/')

# move testing data
move_images(yes_test, 'data_base/yes/', 'data/test/yes/')
# move no testing data
move_images(no_test, 'data_base/no/', 'data/test/no/')

# move validation data
move_images(yes_validation, 'data_base/yes/', 'data/validation/yes/')
# move no validation data
move_images(no_validation, 'data_base/no/', 'data/validation/no/')

    