In [97]:
'''
load images into python
determine whether images are greyscale
convert to greyscale if RGB
file i/o to load images, matrix manipulation, standardization
'''

import PIL
import os
from skimage.io import imread_collection
from skimage.color import rgb2gray

dir_name = 'data_base/backup/NeedleImages/'
imgs = []
greyscale = []
img_size = []

# create a collection with the available images
col = imread_collection(os.path.join(dir_name, '*.jpg'))
# select one image for analysis
im = col[140]

# determine image type and shape
print(type(im))
print(im.shape)

img_gray = rgb2gray(im)
# determine gray image type and shape
print(type(img_gray))
print(img_gray.shape)



<class 'numpy.ndarray'>
(512, 512)
<class 'numpy.ndarray'>
(512, 512)


In [None]:
# list .jpg files in img directory
for root, dirs, files in os.walk(dir_name):
    for file in files:
        if file.endswith('.jpg'):
            imgs.append(file)

# create function to test whether images are greyscale
def is_grey_scale(img_path):
    im = PIL.Image.open(img_path).convert('RGB')
    w,h = im.size
    for i in range(w):
        for j in range(h):
            r,g,b = im.getpixel((i,j))
            if r != g != b:
                return False
    return True

# test set of images for greyscale 
for i in imgs:
    img = os.path.join(dir_name, i)
    greyscale.append(is_grey_scale(img))

In [29]:
'''
create separator to move images to folders based on labels
'''

import csv
import os

SOURCE_ROOT = 'data_base/NeedleImages'
DEST_ROOT = 'data_base/'

with open('data/labels.csv') as infile:
    next(infile)  # Skip the header row
    reader = csv.reader(infile)
    seen = set()
    for Order, External_ID, Label in reader:
        src = os.path.join(SOURCE_ROOT, External_ID)
        dest = os.path.join(DEST_ROOT, Label, External_ID)
        try:
            os.rename(src, dest)
        except WindowsError as e:
            print (e)

In [64]:
'''
setup training, validation, testing splits
'''

import random

yes_dir = 'data_base/yes'
no_dir = 'data_base/no'
yes_imgs = []
no_imgs = []

# create list of .jpg files in yes_img directory
for root, dirs, files in os.walk(yes_dir):
    for file in files:
        if file.endswith('.jpg'):
            yes_imgs.append(file)

yes_imgs.sort()  # make sure that the filenames have a fixed order before shuffling
random.seed(42)
random.shuffle(yes_imgs) # shuffles the ordering of filenames (deterministic given the chosen seed)

split_1 = int(0.8 * len(yes_imgs))
split_2 = int(0.9 * len(yes_imgs))
train_filenames = yes_imgs[:split_1]
val_filenames = yes_imgs[split_1:split_2]
test_filenames = yes_imgs[split_2:]


In [65]:
# create list of .jpg files in no_img directory
for root, dirs, files in os.walk(no_dir):
    for file in files:
        if file.endswith('.jpg'):
            no_imgs.append(file)

no_imgs.sort()  # make sure that the filenames have a fixed order before shuffling
random.seed(42)
random.shuffle(no_imgs) # shuffles the ordering of filenames (deterministic given the chosen seed)

split_1 = int(0.8 * len(no_imgs))
split_2 = int(0.9 * len(no_imgs))
no_train_filenames = no_imgs[:split_1]
no_val_filenames = no_imgs[split_1:split_2]
no_test_filenames = no_imgs[split_2:]

In [66]:
'''
create function to move images to appropriate folder for training, validation, and testing
'''

def move_images(img_list, src_dir, dest_dir):
    for img in img_list:
        src = os.path.join(src_dir, img)
        dest = os.path.join(dest_dir, img)
        os.rename(src, dest)

# move yes training data
move_images(train_filenames, 'data_base/yes/', 'data/train/yes/')
# move no training data
move_images(no_train_filenames, 'data_base/no/', 'data/train/no/')

# move testing data
move_images(test_filenames, 'data_base/yes/', 'data/test/yes/')
# move no testing data
move_images(no_test_filenames, 'data_base/no/', 'data/test/no/')

# move validation data
move_images(val_filenames, 'data_base/yes/', 'data/validation/yes/')
# move no validation data
move_images(no_val_filenames, 'data_base/no/', 'data/validation/no/')



In [91]:
'''
establish expected image parameters, training, validation locations
'''

# expected image size
img_width, img_height = 512, 512

# folder containing the images on which the network will train. The train folder 
# has two sub folders, 'yes' and 'no' needle-containing images.
train_data_dir = 'data/train'

# folder containing the validation samples folder structure is same as the training folder
validation_data_dir = 'data/validation'

# how many images to be considered for training
train_samples = 1800

# how many images to be used for validation
validation_samples = 200

# how many runs will the network make over the training set before starting on validation
epoch = 15


In [92]:
'''
setup keras machine learning architecture
'''

from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

# ** Model Begins **
model = Sequential()
model.add(Convolution2D(32, (3, 3), input_shape=(img_width, img_height, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
# ** Model Ends **
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])


In [93]:
'''
develop image augmentation scripts to amplify sample size
'''

from keras.preprocessing.image import ImageDataGenerator

# this is the augmentation configuration we will use for training
# generating many transformed images so that the model can handle real-world variety
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# pass images to ImageGenerator to create transformed versions
train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='binary')



Found 504 images belonging to 2 classes.
Found 62 images belonging to 2 classes.


In [94]:
'''
run model training
'''

# this is where the actual processing happens (time-consuming)
model.fit_generator(
        train_generator,
        samples_per_epoch=train_samples,
        epochs=epoch,
        validation_data=validation_generator,
        validation_steps=validation_samples)

model.save_weights('trial.h5')

  # This is added back by InteractiveShellApp.init_path()


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [86]:
import os
import h5py
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense

# path to the model weights files.
weights_path = 'vgg16_weights.h5'
top_model_weights_path = 'fc_model.h5'
# dimensions of our images.
img_width, img_height = 256, 256

train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50


trial.h5


In [None]:
# build the VGG16 network
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))

model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

# load the weights of the VGG16 networks
# (trained on ImageNet, won the ILSVRC competition in 2014)
# note: when there is a complete match between your model definition
# and your weight savefile, you can simply call model.load_weights(filename)
assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
f = h5py.File(weights_path)
for k in range(f.attrs['nb_layers']):
    if k >= len(model.layers):
        # we don't look at the last (fully-connected) layers in the savefile
        break
    g = f['layer_{}'.format(k)]
    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
    model.layers[k].set_weights(weights)
f.close()
print('Model loaded.')

# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))

# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)

# add the model on top of the convolutional base
model.add(top_model)

# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:25]:
    layer.trainable = False

# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        batch_size=32,
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_height, img_width),
        batch_size=32,
        class_mode='binary')

# fine-tune the model
model.fit_generator(
        train_generator,
        samples_per_epoch=nb_train_samples,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=nb_validation_samples)
