In [None]:
'''
load images into python
determine whether images are greyscale
convert to greyscale if RGB
file i/o to load images, matrix manipulation, standardization
'''

import PIL
import os
from skimage.io import imread_collection
from skimage.color import rgb2gray

dir_name = 'data_base/backup/NeedleImages/'
imgs = []
greyscale = []
img_size = []

# create a collection with the available images
col = imread_collection(os.path.join(dir_name, '*.jpg'))
# select one image for analysis
im = col[140]

# determine image type and shape
print(type(im))
print(im.shape)

img_gray = rgb2gray(im)
# determine gray image type and shape
print(type(img_gray))
print(img_gray.shape)



In [None]:
# list .jpg files in img directory
for root, dirs, files in os.walk(dir_name):
    for file in files:
        if file.endswith('.jpg'):
            imgs.append(file)

# create function to test whether images are greyscale
def is_grey_scale(img_path):
    im = PIL.Image.open(img_path).convert('RGB')
    w,h = im.size
    for i in range(w):
        for j in range(h):
            r,g,b = im.getpixel((i,j))
            if r != g != b:
                return False
    return True

# test set of images for greyscale 
for i in imgs:
    img = os.path.join(dir_name, i)
    greyscale.append(is_grey_scale(img))

In [None]:
'''
create separator to move images to folders based on labels
'''

import csv
import os

SOURCE_ROOT = 'data_base/NeedleImages'
DEST_ROOT = 'data_base/'

with open('data/labels.csv') as infile:
    next(infile)  # Skip the header row
    reader = csv.reader(infile)
    seen = set()
    for Order, External_ID, Label in reader:
        src = os.path.join(SOURCE_ROOT, External_ID)
        dest = os.path.join(DEST_ROOT, Label, External_ID)
        try:
            os.rename(src, dest)
        except WindowsError as e:
            print (e)

In [None]:
'''
setup training, validation, testing splits
'''

import random

yes_dir = 'data_base/yes'
no_dir = 'data_base/no'
yes_imgs = []
no_imgs = []

# create list of .jpg files in yes_img directory
for root, dirs, files in os.walk(yes_dir):
    for file in files:
        if file.endswith('.jpg'):
            yes_imgs.append(file)

yes_imgs.sort()  # make sure that the filenames have a fixed order before shuffling
random.seed(42)
random.shuffle(yes_imgs) # shuffles the ordering of filenames (deterministic given the chosen seed)

split_1 = int(0.8 * len(yes_imgs))
split_2 = int(0.9 * len(yes_imgs))
train_filenames = yes_imgs[:split_1]
val_filenames = yes_imgs[split_1:split_2]
test_filenames = yes_imgs[split_2:]


In [None]:
# create list of .jpg files in no_img directory
for root, dirs, files in os.walk(no_dir):
    for file in files:
        if file.endswith('.jpg'):
            no_imgs.append(file)

no_imgs.sort()  # make sure that the filenames have a fixed order before shuffling
random.seed(42)
random.shuffle(no_imgs) # shuffles the ordering of filenames (deterministic given the chosen seed)

split_1 = int(0.8 * len(no_imgs))
split_2 = int(0.9 * len(no_imgs))
no_train_filenames = no_imgs[:split_1]
no_val_filenames = no_imgs[split_1:split_2]
no_test_filenames = no_imgs[split_2:]

In [None]:
'''
create function to move images to appropriate folder for training, validation, and testing
'''

def move_images(img_list, src_dir, dest_dir):
    for img in img_list:
        src = os.path.join(src_dir, img)
        dest = os.path.join(dest_dir, img)
        os.rename(src, dest)

# move yes training data
move_images(train_filenames, 'data_base/yes/', 'data/train/yes/')
# move no training data
move_images(no_train_filenames, 'data_base/no/', 'data/train/no/')

# move testing data
move_images(test_filenames, 'data_base/yes/', 'data/test/yes/')
# move no testing data
move_images(no_test_filenames, 'data_base/no/', 'data/test/no/')

# move validation data
move_images(val_filenames, 'data_base/yes/', 'data/validation/yes/')
# move no validation data
move_images(no_val_filenames, 'data_base/no/', 'data/validation/no/')

