# Fish No Fish Detector
The following will look at a 256x256 image and learn if it contains fish parts.

## Import Data
Since each fish in the training data is located in a separate folder, there will be a label for each folder.

In [1]:
import os #for os.listdir
FISH_DIR='C:/Users/Nick/Dropbox/Work/Data Science/02 - Fish Detector/train_data/fish'
NFISH_DIR='C:/Users/Nick/Dropbox/Work/Data Science/02 - Fish Detector/train_data/no fish'

IMG_SIZE_X=128
IMG_SIZE_Y=128

Define a function that does the import process. It takes a folder as argument.

In [2]:
from tqdm import tqdm #library for the progress bar during for loop
import cv2 #required to read images
import matplotlib.pyplot as plt #for displaying images
import numpy as np

def create_training_data(DIR, label,max_num):
    X=[] #Training images
    Y=[] #Training labels
    #print(os.listdir(DIR)[1])
    for img in tqdm(os.listdir(DIR)[0:max_num]):
        #Read Image
        path=os.path.join(DIR, img)
        im=cv2.imread(path)
        im=cv2.normalize(im.astype('float32'), None,0.0, 1.0 ,cv2.NORM_MINMAX) #normalize/convert image to float
        im=cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        #Resize the Image
        im=cv2.resize(im, (IMG_SIZE_X,IMG_SIZE_Y), interpolation=cv2.INTER_AREA) #Set image size to 
        
        #Append image and label to training data
        X.append(np.array(im))
        Y.append(np.array(label))
        #plt.imshow(im)
        #plt.show()
        
    return(X,Y)
        
        
        

Run the function to collect training data for the fish class

In [3]:
(X_fish, Y_fish)=create_training_data(FISH_DIR,[1,0], 3927)
X_fish=np.asarray(X_fish)
Y_fish=np.asarray(Y_fish)

100%|██████████████████████████████████████████████████████████████████████████████| 3927/3927 [00:57<00:00, 68.13it/s]


Run the function to collect training data for the "no fish" class

In [4]:
(X_nfish, Y_nfish)=create_training_data(NFISH_DIR,[0,1], 3927)
X_nfish=np.asarray(X_nfish)
Y_nfish=np.asarray(Y_nfish)

100%|██████████████████████████████████████████████████████████████████████████████| 3927/3927 [01:06<00:00, 59.40it/s]


Now append the two classes and shuffle them around and create test and training data:

In [5]:
X=np.append(X_fish, X_nfish, axis=0)
Y=np.append(Y_fish, Y_nfish, axis=0)

#Clear some resources
del X_fish
del X_nfish
del Y_fish
del Y_nfish

In [6]:
#Shuffle the them around
from tflearn.data_utils import shuffle

seed = 7
np.random.seed(seed)
X, Y = shuffle(X, Y)

X_train=X[0:6200]
Y_train=Y[0:6200]

X_valid=X[6200:7000]
Y_valid=Y[6200:7000]

X_test=X[7000:7855]
Y_test=Y[7000:7855]


del X
del Y


curses is not supported on this machine (please install/reinstall curses for an optimal experience)


# Create the Neural Network
## Pre-define the network architecture

In [7]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint #For saving parts


model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode='same',
                        input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('sigmoid'))

# Let's train the model using RMSprop
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

#checkpoints
import numpy
filepath='weights.best.hdf5'
checkpoint=ModelCheckpoint(filepath, 
                           monitor='val_acc', 
                           verbose=1,
                          save_best_only=True,
                          mode='max')
callbacks_list=[checkpoint]


from keras.preprocessing.image import ImageDataGenerator
#Create data augmentation
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.3,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.3,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False) # randomly flip images

Using TensorFlow backend.


Now train the data (witouth data addition).

In [None]:
import h5py #(for saving)
batch_size=64
nb_epoch=30

datagen.fit(X_train)

model.optimizer.lr.assign(0.005)
model.fit_generator(datagen.flow(X_train, Y_train,
                                 batch_size=batch_size),
                    samples_per_epoch=X_train.shape[0],
                    nb_epoch=nb_epoch,
                    validation_data=(X_test, Y_test),
                    callbacks=callbacks_list,
                    verbose=0)

#model.fit(X_train, Y_train,
#           batch_size=batch_size,
#           nb_epoch=nb_epoch,
#           validation_data=(X_test, Y_test),
#           shuffle=True,
#          callbacks=callbacks_list,
#         verbose=0)

Epoch 00000: val_acc improved from 0.81733 to 0.81792, saving model to weights.best.hdf5
Epoch 00001: val_acc did not improve
Epoch 00002: val_acc did not improve
Epoch 00003: val_acc did not improve
Epoch 00004: val_acc did not improve
Epoch 00005: val_acc improved from 0.81792 to 0.81909, saving model to weights.best.hdf5
Epoch 00006: val_acc did not improve
Epoch 00007: val_acc did not improve
Epoch 00008: val_acc improved from 0.81909 to 0.82670, saving model to weights.best.hdf5
Epoch 00009: val_acc did not improve
Epoch 00010: val_acc did not improve
Epoch 00011: val_acc did not improve
Epoch 00012: val_acc did not improve
Epoch 00013: val_acc did not improve
Epoch 00014: val_acc did not improve
Epoch 00015: val_acc did not improve
Epoch 00016: val_acc improved from 0.82670 to 0.82845, saving model to weights.best.hdf5
Epoch 00017: val_acc improved from 0.82845 to 0.84543, saving model to weights.best.hdf5
Epoch 00018: val_acc did not improve
Epoch 00019: val_acc did not improve


Save weights