# Importing necessary libraries

In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.utils import to_categorical
from keras import backend as K
from sklearn.model_selection import train_test_split, StratifiedKFold
from keras.models import load_model
import numpy as np
import pandas as pd
import glob
import cv2
import csv
import os import walk

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Define two functions

getFish: takes in a filepath and reads image data using OpenCV. Resizes image to 150x150 in RGB. Returns the image pixel data, the classifications (eyespot, no eyespot), and the species name.  

load_kfold_data: takes in k number of folds and a list of image filespaths. Calls getFish to receive image data and appends image data and classification data to respective lists. Label data is converted to binary in order to run in function StratifiedKFold. Convert labels back to categorical. Returns folds, training data, and labels.

In [32]:
def getFish(filepath):
    img = cv2.imread(filepath)
    # standardize size for prediction step
    img = cv2.resize(img, (150, 150))
    img = img/255.0
    img = img.reshape((1,) + img.shape)
    
    if "noeyespot" in filepath:
        classification = "noeyespot"
    else:
        classification = "eyespot"  
    
    _, _, species = filepath.partition('eyespot/')
    
    #res = model.predict(img)
    return(img, classification, species)

def load_data_kfold(k, image_filepaths):
    
    #use getFish function 
    #load in one file, append to a list
    
    x_train = []
    y_train = []
    
    for i in range(len(image_filepaths)):
        x, y, _ = getFish(image_filepaths[i])
        x_train.append(x)
        y_train.append(y)
        if (y_train[i] == 'eyespot'):
            y_train[i] = 0
        else:
            y_train[i] = 1
    
    #StratifiedKFold can only work in binary; convert labels to categorical afterwards
    folds = list(StratifiedKFold(n_splits=k, shuffle=True, random_state=1).split(x_train, y_train))
    
    y_train = to_categorical(y_train)
    
    return folds, x_train, y_train

# Running load_data_kfold

Load list of image filepaths into "fish_paths" by changing corresponding Dropbox paths  
Use kfold function to get 10 folds, the training data, and the trianing labels.

In [33]:
eye_path = "/Users/leannwoo/Dropbox/machine_learning_fishes/image_classifiers/Eyespots_all_species/eyespot/"
noeye_path = "/Users/leannwoo/Dropbox/machine_learning_fishes/image_classifiers/Eyespots_all_species/noeyespot/"

fish_paths = []

for (dirpath, dirnames, filenames) in walk(eye_path):
    for i in range(len(filenames)):
        eye_img = dirpath + filenames[i]
        if "Store" not in eye_img:  
            fish_paths.append(eye_img)
    break

for (dirpath, dirnames, filenames) in walk(noeye_path):
    for i in range(len(filenames)):
        noeye_img = dirpath + filenames[i]
        if "Store" not in noeye_img:
            fish_paths.append(noeye_img)
    break

k = 10
folds, x_train, y_train = load_data_kfold(k, fish_paths)

# Defining CNN

In [34]:
# dimensions of our images.
img_width, img_height = 150, 150

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('sigmoid'))

model.compile(loss='mean_squared_error',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Creating distorted images generator to create more training images

In [35]:
gen = ImageDataGenerator(horizontal_flip = True,
                         vertical_flip = True,
                         width_shift_range = 0.1,
                         height_shift_range = 0.1,
                         zoom_range = 0.1,
                         rotation_range = 10
                        )

# Running training set using 10-fold cross validation

Now using the training data to train the model that was previously defined with a batch size of 20 and 30 epochs per fold

In [36]:
batch_size = 20

for j, (train_idx, val_idx) in enumerate(folds):
    
    X_train_cv = []
    y_train_cv = []
    X_valid_cv = []
    y_valid_cv = []
    
    for i in range(len(train_idx)):
        X_train_cv.append(x_train[train_idx[i]])
        y_train_cv.append(y_train[train_idx[i]])
    for i in range(len(val_idx)):
        X_valid_cv.append(x_train[val_idx[i]])
        y_valid_cv.append(y_train[val_idx[i]])
    
    X_train_cv = np.array(X_train_cv)
    X_train_cv = np.squeeze(X_train_cv, axis = 1)
    
    X_valid_cv = np.array(X_valid_cv)
    X_valid_cv = np.squeeze(X_valid_cv, axis = 1)
    
    y_train_cv = np.array(y_train_cv)
    y_valid_cv = np.array(y_valid_cv)
    
    print '\nFold ',j
    name_weights = "final_model_fold" + str(j) + "_weights.h5"
    generator = gen.flow(X_train_cv, y_train_cv, batch_size = batch_size)
    model.fit_generator(
                generator,
                steps_per_epoch=len(X_train_cv)/batch_size,
                epochs=30,
                shuffle=True,
                verbose=1,
                validation_data = (X_valid_cv, y_valid_cv))
    model.save_weights('first_try.h5')
    model.save('second_try.h5')
    print(model.evaluate(X_valid_cv, y_valid_cv))


Fold  0
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.26110559701919556, 0.5]

Fold  1
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.42225825786590576, 0.375]

Fold  2
Epoch 1/30
Epoch 2/30


Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.27849262952804565, 0.625]

Fold  3
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.22075998783111572, 0.5]

Fold  4
Epoch 1/30
Epoch 2/30
Epoch 3/30


Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.1872883141040802, 0.75]

Fold  5
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.1777370274066925, 0.75]

Fold  6
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30


Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.38079512119293213, 0.625]

Fold  7
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.0869731530547142, 0.875]

Fold  8
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.17743432521820068, 0.75]

Fold  9
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[0.011758597567677498, 1.0]


# Predicting

Define prediction function, takes in a model and a list of returned data from getFish function. Returns the model's prediction, the correct classification, the prediction weights, and the fish family/species.

In [37]:
def predict(model, get_fish_output):
    
    img, correct_class, fam = get_fish_output
    
    prediction_weights = model.predict(img)
    prediction = model.predict_classes(img)
    if (prediction[0] == 0):
        prediction = "eyespot"
    else:
        prediction = "noeyespot"

    #print "Model classification:",prediction,"\nCorrect classification:", correct_class,"\nPrediction wieghts:",prediction_weights,"\nType:",fam,"\n"
    return(prediction, correct_class, prediction_weights, fam)

# Writing the model's classification data to CSV

Loads model in "mod"  
Writes the model classification, correct classification, prediction weights, and species/family to a csv files  
Calculates data for confusion matrix

In [39]:
mod = load_model("second_try.h5")

with open('classification_data.csv', 'wb') as csvfile:
    filewriter = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    filewriter.writerow(['Model Classification', 'Correct Classification', 'Prediction Weights', 'Type'])

    true_eye=0
    true_noeye=0
    false_eye=0
    false_noeye=0

    for i in range(len(fish_paths)):
        info = predict(mod, getFish(fish_paths[i]))
        filewriter.writerow([info[0], info[1], info[2], info[3]])

        #computing info for confusion matrix
        if (info[0] == info[1]):
            if (info[0] == 'eyespot'):
                true_eye += 1
            else:
                true_noeye += 1
        else:
            if (info[0] == 'eyespot'):
                false_eye += 1
            else:
                false_noeye += 1

# Prints confusion matrix

In [40]:
data = {'Eyespots':[true_eye, false_noeye], 'No Eyespots':[false_eye, true_noeye] }
confusion_matrix = pd.DataFrame(data = data)
confusion_matrix = confusion_matrix.rename({0:'Eyespots (Model Output)', 1:'No Eyespots (Model Output)'},axis='index')
confusion_matrix

Unnamed: 0,Eyespots,No Eyespots
Eyespots (Model Output),40,4
No Eyespots (Model Output),0,36
