In [1]:
# import the libraries 
from __future__ import print_function
import tensorflow as tf
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
import numpy as np
import pandas as pd

# get the gpu to do the train
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

# to supress warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] ='2'

Using TensorFlow backend.


In [2]:
# check for the gpu
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14314672282727860934
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3157314764
locality {
  bus_id: 1
  links {
  }
}
incarnation: 9737800159081283107
physical_device_desc: "device: 0, name: GeForce GTX 1050 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


In [3]:
# number of classes
num_classes = 8

# number of batch size and the epochs
batch_size = 32
epochs = 25

# input image dimnsions
img_rows, img_cols = 250, 140
target_size = (img_rows, img_cols)
color_mode = 'rgb'

# checking for shape includding the channels
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 3)
    
# setting the file to be read
train_path = './train'
valid_path = './valid'
test_path_1 = './test1'
test_path_2 = './test2'

In [4]:
# image Augmentation
datagen = ImageDataGenerator(rescale=1/255, shear_range=0.2, horizontal_flip=True, vertical_flip=True)

# loading the training data
training_data = datagen.flow_from_directory(train_path, target_size = target_size, batch_size = batch_size,
                                            class_mode = 'categorical', color_mode = color_mode, shuffle = True)

Found 3301 images belonging to 8 classes.


In [5]:
# image Augmentation
datagen = ImageDataGenerator(rescale=1/255)

# loading the validation data
validation_data = datagen.flow_from_directory(valid_path, target_size = target_size, batch_size = batch_size,
                                            class_mode = 'categorical', color_mode = color_mode, shuffle = True)

Found 476 images belonging to 8 classes.


In [6]:
# image Augmentation
datagen = ImageDataGenerator(rescale=1/255)

# loading the test data
test_data_1 = datagen.flow_from_directory(test_path_1, target_size = target_size, batch_size = 50,
                                            class_mode = None, color_mode = color_mode, shuffle = False)

Found 1000 images belonging to 1 classes.


In [7]:
# image Augmentation
datagen = ImageDataGenerator(rescale=1/255)

# loading the test data
test_data_2 = datagen.flow_from_directory(test_path_2, target_size = target_size, batch_size = 3,
                                            class_mode = None, color_mode = color_mode, shuffle = False)

Found 12153 images belonging to 1 classes.


In [8]:
# define the model
model = Sequential()
model.add(Conv2D(50, kernel_size=(3,3), padding='same', activation='relu', input_shape=input_shape))
model.add(Conv2D(50, kernel_size=(3,3), activation='relu'))
model.add(Conv2D(50, kernel_size=(3,3), strides = 2, activation='relu'))
model.add(Conv2D(100, kernel_size=(3,3), padding='same', activation='relu'))
model.add(Conv2D(100, kernel_size=(3,3), activation='relu'))
model.add(Conv2D(100, kernel_size=(3,3), strides = 2, activation='relu'))
model.add(Flatten())
model.add(Dense(200, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 250, 140, 50)      1400      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 248, 138, 50)      22550     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 123, 68, 50)       22550     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 123, 68, 100)      45100     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 121, 66, 100)      90100     
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 60, 32, 100)       90100     
_________________________________________________________________
flatten_1 (Flatten)          (None, 192000)            0         
__________

In [10]:
# compile the loss funcation and the optimizer funcation
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])

In [11]:
# train the model
STEP_SIZE_TRAIN = training_data.n // training_data.batch_size
STEP_SIZE_VALID = validation_data.n // validation_data.batch_size

model.fit_generator(generator = training_data,
                    steps_per_epoch = STEP_SIZE_TRAIN,
                    validation_data = validation_data,
                    validation_steps = STEP_SIZE_VALID,
                    epochs = epochs
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x129b5b4d518>

In [13]:
model.save('my_3rd_model.h5')  # creates a HDF5 file 'my_model.h5'

In [14]:
test_data_1.reset()
predictions_1 = model.predict_generator(test_data_1, steps=20, verbose=0)

In [15]:
FishNames = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

test_image_list = test_data_1.filenames
f_submit = open('submit_1_c.csv', 'w')
f_submit.write('image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT\n')

for i, image_name in enumerate(test_image_list):
    pred = ['%.6f' % p for p in predictions_1[i, :]]
    f_submit.write('%s,%s\n' % (os.path.basename(image_name), ','.join(pred)))

f_submit.close()

In [16]:
df_1 = pd.read_csv('submit_1_c.csv')
df_1.head(2)

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,img_00005.jpg,6e-06,0.0,0.0,0.0,0.999994,0.0,0.0,0.0
1,img_00007.jpg,0.998462,8e-06,0.0,0.000138,0.000732,0.000139,0.0,0.000521


In [17]:
test_data_2.reset()
predictions_2 = model.predict_generator(test_data_2, steps=4051, verbose=0)

In [18]:
FishNames = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

test_image_list = test_data_2.filenames
f_submit = open('submit_2_c.csv', 'w')
f_submit.write('image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT\n')

for i, image_name in enumerate(test_image_list):
    pred = ['%.6f' % p for p in predictions_2[i, :]]
    f_submit.write('%s,%s\n' % (os.path.basename(image_name), ','.join(pred)))

f_submit.close()

In [19]:
df_2 = pd.read_csv('submit_2_b.csv')
df_2.head(2)

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,image_00001.jpg,0.264242,0.172134,0.011821,0.027255,0.051512,0.258965,0.07048,0.143592
1,image_00002.jpg,0.057599,0.001027,0.012868,0.000183,0.892273,0.002944,3e-06,0.033104


In [20]:
df_2['image'] = df_2['image'].map(lambda x: 'test_stg2/' + x)
df_2.head(2)

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,test_stg2/image_00001.jpg,0.264242,0.172134,0.011821,0.027255,0.051512,0.258965,0.07048,0.143592
1,test_stg2/image_00002.jpg,0.057599,0.001027,0.012868,0.000183,0.892273,0.002944,3e-06,0.033104


In [21]:
new_df = pd.concat([df_1, df_2])

In [22]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13153 entries, 0 to 12152
Data columns (total 9 columns):
image    13153 non-null object
ALB      13153 non-null float64
BET      13153 non-null float64
DOL      13153 non-null float64
LAG      13153 non-null float64
NoF      13153 non-null float64
OTHER    13153 non-null float64
SHARK    13153 non-null float64
YFT      13153 non-null float64
dtypes: float64(8), object(1)
memory usage: 1.0+ MB


In [23]:
new_df.reset_index(drop=True, inplace=True)

In [24]:
new_df.to_csv('f_c.csv')