In [57]:
'''
load images into python
determine shape of image arrays
build function to determine whether images are greyscale
'''

import os
import matplotlib.pyplot as plt
from skimage import color
from skimage.io import imread_collection
from skimage.color import rgb2gray
from skimage.transform import rescale, resize, downscale_local_mean
from PIL import Image

dir_name = 'data_base/backup/NeedleImages/'
image_list = []
greyscale = []
image_size = []

# create a collection with the available images
col = imread_collection(os.path.join(dir_name, '*.jpg'))
# select one image for analysis
image = col[140]

# determine image type and shape
print(type(image))
print(image.shape)

# determine gray image type and shape
image_gray = rgb2gray(image)
print(type(image_gray))
print(image_gray.shape)

# list .jpg files in img directory
for root, dirs, files in os.walk(dir_name):
    for file in files:
        if file.endswith('.jpg'):
            image_list.append(file)
            

image = color.rgb2gray(image)
image_rescaled = rescale(image, 1.0 / 2.0, anti_aliasing=False)

print(image_rescaled.shape)
print(image_gray)

img = Image.open(os.path.join(dir_name, image_list[1])) 
array = np.array(img)
print(array)
print(array.shape)

<class 'numpy.ndarray'>
(512, 512)
<class 'numpy.ndarray'>
(512, 512)
(256, 256)
[[36 41 46 ... 49 48 43]
 [37 41 45 ... 46 46 42]
 [39 41 43 ... 42 44 42]
 ...
 [42 42 43 ... 42 42 39]
 [42 42 43 ... 44 43 38]
 [38 39 41 ... 48 47 42]]
[[ 42  39  37 ...  43  42  41]
 [ 43  40  37 ...  43  43  41]
 [ 40  39  37 ...  44  43  42]
 ...
 [ 39  39  36 ... 219 217 216]
 [ 37  39  37 ... 222 222 223]
 [ 35  39  39 ... 221 222 222]]
(512, 512)


In [None]:

# create function to test whether images are greyscale
def is_grey_scale(img_path):
    im = PIL.Image.open(img_path).convert('RGB')
    w,h = im.size
    for i in range(w):
        for j in range(h):
            r,g,b = im.getpixel((i,j))
            if r != g != b:
                return False
    return True

# test set of images for greyscale 
for i in image_list:
    image = os.path.join(dir_name, i)
    greyscale.append(is_grey_scale(image))
    
#print(image_list)
#print(greyscale)

In [58]:
'''
import, resize, and reshape images
'''

import os
import re
import cv2
from matplotlib import pyplot
import numpy as np

dir_name = 'data_base/NeedleImages/'
images = []

for root, dirnames, filenames in os.walk(dir_name):
    for filename in filenames:
        if re.search("\.(jpg|jpeg|png|bmp|tiff)$", filename):
            filepath = os.path.join(root, filename)
            image = cv2.imread(filepath)
            image_resized = cv2.resize(image,(256,256))
            images.append(image_resized)

images = np.array(images)

print(images[1].shape)


(256, 256, 3)


In [42]:
img = image[1]
print(img.shape)



(512, 3)


In [None]:
'''
create separator to move images to yes/no folders based on labels
'''

import csv
import os

SOURCE_ROOT = 'data_base/NeedleImages'
DEST_ROOT = 'data_base/'

with open('data/labels.csv') as infile:
    next(infile)  # Skip the header row
    reader = csv.reader(infile)
    seen = set()
    for Order, External_ID, Label in reader:
        src = os.path.join(SOURCE_ROOT, External_ID)
        dest = os.path.join(DEST_ROOT, Label, External_ID)
        try:
            os.rename(src, dest)
        except WindowsError as e:
            print (e)

In [None]:
'''
setup training, validation, testing splits
'''

import random

yes_dir = 'data_base/yes'
no_dir = 'data_base/no'
yes_imgs = []
no_imgs = []

# create list of .jpg files in yes_img directory
for root, dirs, files in os.walk(yes_dir):
    for file in files:
        if file.endswith('.jpg'):
            yes_imgs.append(file)

yes_imgs.sort()  # make sure that the filenames have a fixed order before shuffling
random.seed(42)
random.shuffle(yes_imgs) # shuffles the ordering of filenames (deterministic given the chosen seed)

split_1 = int(0.8 * len(yes_imgs))
split_2 = int(0.9 * len(yes_imgs))
train_filenames = yes_imgs[:split_1]
val_filenames = yes_imgs[split_1:split_2]
test_filenames = yes_imgs[split_2:]


In [None]:
# create list of .jpg files in no_img directory
for root, dirs, files in os.walk(no_dir):
    for file in files:
        if file.endswith('.jpg'):
            no_imgs.append(file)

no_imgs.sort()  # make sure that the filenames have a fixed order before shuffling
random.seed(42)
random.shuffle(no_imgs) # shuffles the ordering of filenames (deterministic given the chosen seed)

split_1 = int(0.8 * len(no_imgs))
split_2 = int(0.9 * len(no_imgs))
no_train_filenames = no_imgs[:split_1]
no_val_filenames = no_imgs[split_1:split_2]
no_test_filenames = no_imgs[split_2:]

In [None]:
'''
create function to move images to appropriate folder for training, validation, and testing
'''

def move_images(img_list, src_dir, dest_dir):
    for img in img_list:
        src = os.path.join(src_dir, img)
        dest = os.path.join(dest_dir, img)
        os.rename(src, dest)

# move yes training data
move_images(train_filenames, 'data_base/yes/', 'data/train/yes/')
# move no training data
move_images(no_train_filenames, 'data_base/no/', 'data/train/no/')

# move testing data
move_images(test_filenames, 'data_base/yes/', 'data/test/yes/')
# move no testing data
move_images(no_test_filenames, 'data_base/no/', 'data/test/no/')

# move validation data
move_images(val_filenames, 'data_base/yes/', 'data/validation/yes/')
# move no validation data
move_images(no_val_filenames, 'data_base/no/', 'data/validation/no/')

