In [4]:
import cv2
import random
import os
import math
import numpy as np
import shutil
import re

# For natural ordering
def atoi(text):
    return int(text) if text.isdigit() else text

# For natural ordering
def natural_keys(text):
    return [ atoi(c) for c in re.split('(\d+)', text) ]

# List all the non-hidden directories
def listdir_nohidden(path):
    lst = os.listdir(path)
    lst.sort(key=natural_keys)
    for f in lst:
        if not f.startswith('.'):
            yield f
            
# Returns a list of the paths of all the images in that folder
def images_in_folder(path):
    lst = os.listdir(path)
    lst.sort(key=natural_keys)
    for f in lst:
        if '.jpeg' in f and not f.startswith('.'):
            yield f
            
def count_folders(path):
    return len([name for name in os.listdir(path) if not name.startswith('.')])

# Returns a list of trainning labels, a list of trainning images, a list of test labels and a list of test images
# The size of the test set is determined by test_size.
# test_size should be a number between 0-100. 0 means no test set, 100 means all test set, 50 means half and half.
def get_all_labels_and_images(path, test_size):
    images = []
    labels = []
    folders = list(listdir_nohidden(path))
    for idx, folder in enumerate(folders):
        print 'Loading Images in folder ' + folder
        all_images = list(images_in_folder(path + '/' + folder))
        for img in all_images:
            encoding = [0] * len(folders)
            pixels = cv2.imread(path + '/' + folder + '/' + img,1)
            if pixels is None:
                print 'Bad'
                continue
            images.append(pixels.reshape(3,100,100))
            encoding[idx] = 1
            labels.append(encoding)
   
    # Shuffle the images
    combined = zip(labels, images)
    random.shuffle(combined)
    labels , images = zip(*combined)
    labels = np.array(list(labels))
    images = np.array(list(images))
   
    percent_test = test_size * 1.0 / 100
    split = int(math.ceil(len(labels) * percent_test))
    labels_test = labels[:split]
    labels = labels[split:]
    images_test = images[:split]
    images = images[split:]
   
    return labels, images, labels_test, images_test, folders

In [20]:
def CropImage(imageDiff, origImage, threshold_anchor, threshold_range, search_range, crop_size):
    height, width = imageDiff.shape
    min = 0
    maxcount = 0
    min_i = 0
    min_j = 0
    search_range = search_range / 2
    crop_size = crop_size / 2

    for i in range(0,height):
        for j in range(0,width):
            if (imageDiff[i,j] < -threshold_anchor):
                count = 0
                sum = 0
                for k in range(i-search_range,i+search_range):
                    for l in range(j-search_range,j+search_range):
                        if ((k > 0) and (k < height) and (l > 0) and (l < width)):
                            if (imageDiff[k,l] < -threshold_range):
                                count = count + 1
                                sum = sum + (-1*imageDiff[k,l])

                if (count > maxcount):
                    min_i = i
                    min_j = j
                    min = imageDiff[i,j]
                    maxcount = count

    #keeps it from cropping off the side of the picture
    if ((min_i - crop_size)<0): min_i = crop_size
    if ((min_i + crop_size) >= height): min_i = height - crop_size - 1
    if ((min_j - crop_size) < 0): min_j = crop_size
    if ((min_j + crop_size) >= width): min_j = width - crop_size - 1

    imagecrop = origImage[min_i-crop_size:min_i+crop_size, min_j-crop_size:min_j+crop_size]

    return imagecrop

In [21]:
print 'Loading Images Please Be Patient'
labels, imgs, labels_test, imgs_test, encoding = get_all_labels_and_images('/Users/patrickhayes/Desktop/labeled_cropped', 25)
print 'Done Loading Images'

Loading Images Please Be Patient
Loading Images in folder 0_Eyes
Loading Images in folder 1_Eye
Loading Images in folder 2_Eyes
Loading Images in folder >2_Eyes
Loading Images in folder Dead
Loading Images in folder No_Head
Loading Images in folder No_Worm
Done Loading Images


In [22]:
print len(labels_test)

576


In [23]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Convolution2D, MaxPooling2D, Activation, BatchNormalization
from keras.optimizers import SGD
from keras.layers import Flatten
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
import numpy as np
import cv2 as cv2
import h5py
sgd = SGD(lr=0.001, momentum=0.9, decay=0.001)


def get_basic_model(num_classes):
    model = Sequential()

    model.add(Convolution2D(5, 3, 3, border_mode='same', input_shape=(100, 100, 3), activation='relu'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

def get_adv_model(num_classes):
    model = Sequential()
 
    model.add(Convolution2D(25, 3, 3, border_mode='same', input_shape=(3,100, 100),
                                                             activation='relu'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2,2)))
 
    ## Second layer is another convolutional layer
    model.add(Convolution2D(25, 3, 3, activation='relu', border_mode='same'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
 
    ## Third layer is another convolutional layer
    model.add(Convolution2D(25, 3, 3, activation='relu', border_mode='same'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    ## Forth layer is another convolutional layer
    model.add(Convolution2D(25, 3, 3, activation='relu', border_mode='same'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2,2)))
 
    ## Flatten out the convolutional layer so we can have a fully connected layer
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

    return model

Using Theano backend.


In [24]:
print 'Compiling CNN'
model = get_adv_model(count_folders('/Users/patrickhayes/Desktop/labeled_cropped'))
print 'Done Compiling CNN'

Compiling CNN
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_1 (Convolution2D)  (None, 25, 100, 100)  700         convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
batchnormalization_1 (BatchNorma (None, 25, 100, 100)  100         convolution2d_1[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 25, 50, 50)    0           batchnormalization_1[0][0]       
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 25, 50, 50)    5650        maxpooling2d_1[0][0]             
_____________________________________________________________________________

In [25]:
print imgs.shape
print labels.shape
history = model.fit(imgs, labels, batch_size=10, nb_epoch=5,verbose=1, callbacks=[],
                    validation_data=(imgs_test, labels_test),
                    shuffle=True, class_weight='auto', sample_weight=None)

(1727, 3, 100, 100)
(1727, 7)
Train on 1727 samples, validate on 576 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [31]:
pred = model.predict(imgs_test)

In [33]:
tp, fp, tn, fn = calc_balanced_accuracy(labels_test, pred, encoding)

['0_Eyes', '1_Eye', '2_Eyes', '>2_Eyes', 'Dead', 'No_Head', 'No_Worm']
predicted 1
actual label 0
predicted 5
actual label 5
predicted 2
actual label 2
predicted 0
actual label 5
predicted 1
actual label 1
True Positives 193
True Negative 368
Fales Positives 56
False Negatives 46
The Balance Error Rate is 0.162272045472
A perfect predictor would be at 0 and the worse predictor would be at 1


In [32]:
def calc_balanced_accuracy(labels_test, labels_predicted, encoding):
    print encoding
    normal = list()
    abnormal = list()
    bad_angle = list()
    for idx, label in enumerate(encoding):
        if label == '2_Eyes':
            normal.append(idx)
        elif label == '0_Eyes' or label == '1_Eye' or label == '>2_Eyes':
            abnormal.append(idx)
        elif label == 'No_Head' or label == 'No_Worm' or label == 'Dead':
            bad_angle.append(idx)
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for idx in range(0,len(labels_test)):
        pred = maxPred(labels_predicted[idx])
        true = maxPred(labels_test[idx])
        if idx < 5:
            print "predicted " + str(pred)
            print "actual label " + str(true)
        
        if pred in abnormal and true in abnormal:
            tp += 1
        elif pred in abnormal and true not in abnormal:
            fp += 1
        elif pred not in abnormal and true in abnormal:
            fn += 1
        elif pred not in abnormal and true not in abnormal:
            tn += 1
    print 'True Positives ' + str(tp)
    print 'True Negative ' + str(tn)
    print 'Fales Positives ' + str(fp)
    print 'False Negatives ' + str(fn)
    bal_err_rate = 1 - 0.5 * (tp * 1.0 /(tp + fn) + tn * 1.0 /(tn+fp))
    print "The Balance Error Rate is " + str(bal_err_rate)
    print "A perfect predictor would be at 0 and the worse predictor would be at 1"
    return tp, fp, tn, fn,  
        
        

In [None]:

            
images = [] 
folders = list(listdir_nohidden(path))
    for idx, folder in enumerate(folders):
        print 'Loading Images in folder ' + folder
        all_images = images_in_folder(path + '/'+ folder)
        for img in all_images:
            encoding = [0] * len(folders)
            pixels = cv2.imread(img,1)
            if pixels is None:
                print 'Bad'
                continue
            images.append(pixels.reshape(3,100,100))
            encoding[idx] = 1
            labels.append(encoding)


In [55]:
# Takes in folder with all the images for a well (uncropped)
# It first crops all the images, then it predicts the number of eyes
# for each image. It prints these results to file. At the top of this
# file is the prediction for the entire well
def predict_well(folder_path, encoding, model,
                 threshold_anchor, threshold_range,
                 search_range, crop_size):
    print "Encoding = " + str(encoding)
    
    image_names = list(images_in_folder(folder_path))
    images = []
    for i in range(1,len(image_names)):
        print i
        image1 = cv2.imread(folder_path + '/' + image_names[i-1],1)
        image1 = image1.astype(np.int16)
        image2 = cv2.imread(folder_path + '/' + image_names[i],1)
        imageDiff = image2[:,:,1] - image1[:,:,1]
        image_crop = CropImage(imageDiff, image2, threshold_anchor,
                              threshold_range, search_range, crop_size)
        images.append(image_crop.reshape(3,100,100))
    images = np.array(list(images))
    pred = model.predict(images)
    
    results = open("results.txt",'w')
    results.write("Name\tPredicted Category")
    for cat in encoding:
        results.write("\t" + cat)
    results.write('\n')
    results.write("Well\t"+ encoding[maxPred(sum(pred))])
    print sum(pred)
    print
    print pred
    

In [56]:
predict_well('/Users/patrickhayes/Desktop/SARA_EYE_ANALYSIS_PLATES/0_Eyes/7',encoding, model, 10,10,10,100)  


Encoding = ['0_Eyes', '1_Eye', '2_Eyes', '>2_Eyes', 'Dead', 'No_Head', 'No_Worm']
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
[  2.77091950e-01   5.50453424e-01   7.84282970e+00   2.72688852e-03
   9.95394657e-04   2.19699574e+00   1.31289072e+01]

[[  2.72185151e-02   4.68593203e-02   5.30107796e-01   2.18906906e-04
    8.48722193e-05   3.77158195e-01   1.83523912e-02]
 [  2.04297565e-02   4.65824008e-02   6.39210284e-01   1.40745848e-04
    4.38353854e-05   2.86407083e-01   7.18592200e-03]
 [  8.82654451e-03   1.20670110e-01   6.08890176e-01   5.07068871e-05
    2.79263550e-05   2.50953227e-01   1.05812931e-02]
 [  5.53537644e-02   3.45210768e-02   8.69107962e-01   9.35277203e-05
    1.28039042e-04   1.58804357e-02   2.49151718e-02]
 [  5.90441748e-02   3.47524807e-02   8.69085073e-01   1.09181849e-04
    1.89101062e-04   1.60244741e-02   2.07954906e-02]
 [  1.31034618e-02   3.99403274e-02   7.48389602e-01   2.57485575e-04
    7.74661094e-05   1.75761580e-01   2.24

In [57]:
def maxPred(labels):
    maxIdx = None
    maxVal = None
    for idx, val in enumerate(labels):
        if maxIdx is None or val > maxVal:
            maxIdx = idx
            maxVal = val
    return maxIdx

In [1]:
ma = [[1,2,3], [4,5,6],[7,8,9]]
print ma

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]


In [2]:
sum(ma)

TypeError: unsupported operand type(s) for +: 'int' and 'list'

In [5]:
ma = np.array(ma)
sum(ma)

array([12, 15, 18])

In [7]:
ma[0]

array([1, 2, 3])

In [9]:
sum(ma)[0]

12