In [27]:
'''
load images into python
determine whether images are greyscale
convert to greyscale if RGB
file i/o to load images, matrix manipulation, standardization
'''

import PIL
import os
from skimage.io import imread_collection

dir_name = 'data/NeedleImages/'
imgs = []
greyscale = []
img_size = []

# create a collection with the available images
col = imread_collection(os.path.join(dir_name, '*.jpg'))
# select one image for analysis
im = col[140]
# determine image type and shape
print(type(im))
print(im.shape)

# create list of .jpg files in img directory
for root, dirs, files in os.walk(dir_name):
    for file in files:
        if file.endswith('.jpg'):
            imgs.append(file)

# create function to test whether images are greyscale
def is_grey_scale(img_path):
    im = PIL.Image.open(img_path).convert('RGB')
    w,h = im.size
    for i in range(w):
        for j in range(h):
            r,g,b = im.getpixel((i,j))
            if r != g != b:
                return False
    return True

# test set of images for greyscale 
for i in imgs:
    img = os.path.join(dir_name, i)
    greyscale.append(is_grey_scale(img))
    

In [28]:
'''
create separator to move images to folders based on labels
'''

import csv
import os

SOURCE_ROOT = 'data_base/NeedleImages'
DEST_ROOT = 'data_base/'

with open('data/labels.csv') as infile:
    next(infile)  # Skip the header row
    reader = csv.reader(infile)
    seen = set()
    for Order, External_ID, Label in reader:

        src = os.path.join(SOURCE_ROOT, External_ID)
        dest = os.path.join(DEST_ROOT, Label, External_ID)

        try:
            os.rename(src, dest)
        except WindowsError as e:
            print (e)

ValueError: too many values to unpack (expected 2)

In [None]:
'''
setup training, validation, testing splits
'''

yes_dir

# create list of .jpg files in yes_img directory
for root, dirs, files in os.walk(dir_name):
    for file in files:
        if file.endswith('.jpg'):
            imgs.append(file)

yes_filenames = ['img_000.jpg', 'img_001.jpg', ...]
filenames.sort()  # make sure that the filenames have a fixed order before shuffling
random.seed(42)
random.shuffle(filenames) # shuffles the ordering of filenames (deterministic given the chosen seed)

split_1 = int(0.8 * len(filenames))
split_2 = int(0.9 * len(filenames))
train_filenames = filenames[:split_1]
val_filenames = filenames[split_1:split_2]
test_filenames = filenames[split_2:]

In [None]:
'''
establish expected image parameters, training, validation locations
'''

# expected image size
img_width, img_height = 512, 512

# folder containing the images on which the network will train. The train folder 
# has two sub folders, 'yes' and 'no' needle-containing images.
train_data_dir = 'data/train'

# folder containing the validation samples folder structure is same as the training folder
validation_data_dir = 'data/validation'

# how many images to be considered for training
train_samples = 2000

# how many images to be used for validation
validation_samples = 800

# how many runs will the network make over the training set before starting on validation
epoch = 50


In [None]:
'''
setup keras machine learning architecture
'''

from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

# ** Model Begins **
model = Sequential()
model.add(Convolution2D(32, 3, 3, input_shape=(3, img_width, img_height)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
# ** Model Ends **
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])


In [None]:
'''
develop image augmentation scripts to amplify sample size
'''

from keras.preprocessing.image import ImageDataGenerator

# this is the augmentation configuration we will use for training
# generating many transformed images so that the model can handle real-world variety
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# pass images to ImageGenerator to create transformed versions
train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='binary')



In [None]:
'''
run model training
'''

# this is where the actual processing happens (time-consuming)
model.fit_generator(
        train_generator,
        samples_per_epoch=train_samples,
        nb_epoch=epoch,
        validation_data=validation_generator,
        nb_val_samples=validation_samples)

model.save_weights('trial.h5')