In [143]:
import cv2
import random
import os
import math
import numpy as np

# List all the non-hidden directories
def listdir_nohidden(path):
    for f in os.listdir(path):
        if not f.startswith('.'):
            yield f

# Returns a list of the paths of all the images in that folder
def images_in_folder(path):
    return [path + '/' + name for name in os.listdir(path) if '.jpeg' in name]

def count_folders(path):
    return len([name for name in os.listdir(path) if not name.startswith('.')])

# Returns a list of trainning labels, a list of trainning images, a list of test labels and a list of test images
# The size of the test set is determined by test_size.
# test_size should be a number between 0-100. 0 means no test set, 100 means all test set, 50 means half and half.
def get_all_labels_and_images(path, test_size):
    images = []
    labels = []
    folders = list(listdir_nohidden(path))
    for idx, folder in enumerate(folders):
        print 'Loading Images in folder ' + folder
        all_images = images_in_folder(path + '/'+ folder)
        for img in all_images:
            encoding = [0] * len(folders)
            pixels = cv2.imread(img,1)
            if pixels is None:
                print 'Bad'
                continue
            images.append(pixels.reshape(3,100,100))
            encoding[idx] = 1
            labels.append(encoding)
   
    # Shuffle the images
    combined = zip(labels, images)
    random.shuffle(combined)
    labels , images = zip(*combined)
    labels = np.array(list(labels))
    images = np.array(list(images))
   
    percent_test = test_size * 1.0 / 100
    split = int(math.ceil(len(labels) * percent_test))
    labels_test = labels[:split]
    labels = labels[split:]
    images_test = images[:split]
    images = images[split:]
   
    return labels, images, labels_test, images_test, folders

In [144]:
print 'Loading Images Please Be Patient'
labels, imgs, labels_test, imgs_test, encoding = get_all_labels_and_images('/Users/patrickhayes/Desktop/cropped', 25)
print 'Done Loading Images'

Loading Images Please Be Patient
Loading Images in folder 0_Eyes
Loading Images in folder 1_Eye
Loading Images in folder 2_Eyes
Loading Images in folder >2_Eyes
Loading Images in folder Dead
Loading Images in folder No_Head
Loading Images in folder No_Worm
Done Loading Images


In [141]:
print len(labels_test)

663


In [154]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Convolution2D, MaxPooling2D, Activation, BatchNormalization
from keras.optimizers import SGD
from keras.layers import Flatten
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
import numpy as np
import cv2 as cv2
import h5py
sgd = SGD(lr=0.005, momentum=0.9, decay=0.001)


def get_basic_model(num_classes):
    model = Sequential()

    model.add(Convolution2D(5, 3, 3, border_mode='same', input_shape=(100, 100, 3), activation='relu'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model

def get_adv_model(num_classes):
    model = Sequential()
 
    model.add(Convolution2D(124, 3, 3, border_mode='same', input_shape=(3,100, 100),
                                                             activation='relu'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2,2)))
 
    ## Second layer is another convolutional layer
    model.add(Convolution2D(124, 3, 3, activation='relu', border_mode='same'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
 
    ## Third layer is another convolutional layer
    model.add(Convolution2D(124, 3, 3, activation='relu', border_mode='same'))
    model.add(BatchNormalization(epsilon=1e-05, mode=0, axis=1))
    model.add(MaxPooling2D(pool_size=(2,2)))
 
    ## Flatten out the convolutional layer so we can have a fully connected layer
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

    return model

In [155]:
print 'Compiling CNN'
model = get_adv_model(count_folders('/Users/patrickhayes/Desktop/cropped'))
print 'Done Compiling CNN'

Compiling CNN
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_40 (Convolution2D) (None, 124, 100, 100) 3472        convolution2d_input_20[0][0]     
____________________________________________________________________________________________________
batchnormalization_39 (BatchNorm (None, 124, 100, 100) 496         convolution2d_40[0][0]           
____________________________________________________________________________________________________
maxpooling2d_39 (MaxPooling2D)   (None, 124, 50, 50)   0           batchnormalization_39[0][0]      
____________________________________________________________________________________________________
convolution2d_41 (Convolution2D) (None, 124, 50, 50)   138508      maxpooling2d_39[0][0]            
_____________________________________________________________________________

In [156]:
print imgs.shape
print labels.shape
history = model.fit(imgs, labels, batch_size=10, nb_epoch=5,verbose=1, callbacks=[],
                    validation_data=(imgs_test, labels_test),
                    shuffle=True, class_weight='auto', sample_weight=None)

(1988, 3, 100, 100)
(1988, 7)
Train on 1988 samples, validate on 663 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [145]:
pred = model.predict(imgs_test)

In [153]:
tp, fp, tn, fn = calc_balanced_accuracy(labels_test, pred, encoding)

2
0
6
1
2
0
6
6
6
6
True Positives 0
True Negative 417
Fales Positives 0
False Negatives 246
The Balance Error Rate is 0.5
A perfect predictor would be at 0 and the worse predictor would be at 1


In [152]:
def calc_balanced_accuracy(labels_test, labels_predicted, encoding):
    normal = list()
    abnormal = list()
    bad_angle = list()
    for idx, label in enumerate(encoding):
        if label == '2_Eyes':
            normal.append(idx)
        elif label == '0_Eyes' or label == '1_Eye' or label == '>2_Eyes':
            abnormal.append(idx)
        elif label == 'No_Head' or label == 'No_Worm' or label == 'Dead':
            bad_angle.append(idx)
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for idx in range(0,len(labels_test)):
        pred = maxPred(labels_predicted[idx])
        true = maxPred(labels_test[idx])
        if idx < 5:
            print pred
            print true
        
        if pred in abnormal and true in abnormal:
            tp += 1
        elif pred in abnormal and true not in abnormal:
            fp += 1
        elif pred not in abnormal and true in abnormal:
            fn += 1
        elif pred not in abnormal and true not in abnormal:
            tn += 1
    print 'True Positives ' + str(tp)
    print 'True Negative ' + str(tn)
    print 'Fales Positives ' + str(fp)
    print 'False Negatives ' + str(fn)
    bal_err_rate = 1 - 0.5 * (tp * 1.0 /(tp + fn) + tn * 1.0 /(tn+fp))
    print "The Balance Error Rate is " + str(bal_err_rate)
    print "A perfect predictor would be at 0 and the worse predictor would be at 1"
    return tp, fp, tn, fn,  
        
        

In [123]:
def maxPred(labels):
    maxIdx = None
    maxVal = None
    for idx, val in enumerate(labels):
        if maxIdx is None or val > maxVal:
            maxIdx = idx
            maxVal = val
    return maxIdx